# Import Libraries needed by this notebook

In [1]:
import numpy as np
import pandas as pd

In [None]:
pd.__version__

'2.2.2'

# Series Object


*   A pandas Series is a one-dimensional labelled array.
*   A Series combines the best features of a list and a dictionary.
*   A Series maintains a single collection of ordered values (i.e. a single column of data).
*   We can assign each value an identifier, which does not have to be unique.





In [None]:
A = pd.Series([2,5,7,9,11], index=["a", "b","c","d","e"])
print(type(A))
print(A)
print(type(A.values))
print(A.values)
print(type(A.index))
print(A.index)
print(A["a"])
print(A["a":"c"])

<class 'pandas.core.series.Series'>
a     2
b     5
c     7
d     9
e    11
dtype: int64
<class 'numpy.ndarray'>
[ 2  5  7  9 11]
<class 'pandas.core.indexes.base.Index'>
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
2
a    2
b    5
c    7
dtype: int64


# Create Series Object from Dictionary


*   The keys becomes identifiers and value becomes the items of the list.



In [None]:
grades_dic = { "A": 4, "A-": 3.5, "B": 3, "B-": 2.5, "C": 2}
marks_dic = { "A": 85, "A-": 80, "B": 75, "B-": 70, "C": 65}

grades_series = pd.Series(grades_dic)
marks_series = pd.Series(marks_dic)

print(grades_series)
print(marks_series)

A     4.0
A-    3.5
B     3.0
B-    2.5
C     2.0
dtype: float64
A     85
A-    80
B     75
B-    70
C     65
dtype: int64


# Intro to Series Methods



*   Sum
*   Product
*   Mean
*   std - Standard Deviation

> Standard deviation is a measure of how spread out values are in a dataset.
> *   Low standard deviation → values are close to the average
> *   High standard deviation → values are widely spread out

> Think of Standard deviation as the “typical distance” of data points from the mean.

> #### Formal definition
Standard deviation is the square root of the variance.

> #### Why use it?
> *   Understand variability in data
> *   Compare consistency (e.g., test scores, sales, sensor readings)
> *   Core to statistics, machine learning, and data analysis


> #### Rule of thumb (normal distribution)
> *   ~68% of values lie within ±1 std dev of the mean
> *   ~95% within ±2 std dev
> *   ~99.7% within ±3 std dev

In [4]:
prices = pd.Series([2.99, 4.45, 1.36])

print("-------The Series-------")
print(prices)

print("-------The sum-------")
print(prices.sum())

print("-------The product-------")
print(prices.product())

print("-------The Means-------")
print(prices.mean())

print("-------The Standard Deviation-------")
print(prices.std())

-------The Series-------
0    2.99
1    4.45
2    1.36
dtype: float64
-------The sum-------
8.8
-------The product-------
18.095480000000006
-------The Means-------
2.9333333333333336
-------The Standard Deviation-------
1.5457791994115246


# Intro to Attributes

*   An attribute is a piece of data that lives on an object.
*   An attribute is a fact, a detail, a characteristic of the object.
*   Access an attribute with object.attribute syntax.


# Dataframe

*   It is the core data structure in pandas and is used to work with tabular (row–column) data—like spreadsheets or database tables.
*   A DataFrame is a two-dimensional, labeled data structure with:
    - Rows (index)
    - Columns (labels)
    - Potentially different data types per column

In [None]:
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "City": ["New York", "Chicago", "San Francisco"]
}

df = pd.DataFrame(data)
print(df)

      Name  Age           City
0    Alice   25       New York
1      Bob   30        Chicago
2  Charlie   35  San Francisco


### Add a new column

In [None]:
D = pd.DataFrame({"Marks": marks_series, "Graded": grades_series})

print(D)

D['Scaled Marks'] = D["Marks"] / 90

print(D)

### Applying Masking / Filter

In [None]:
G = D[D["Marks"] > 70]
G

Unnamed: 0,Marks,Graded,Scaled Marks
A,85,4.0,0.944444
A-,80,3.5,0.888889
B,75,3.0,0.833333


# Read CSV File donwloaded from UCI Repository - Adults dataset

In [None]:
adults_data = pd.read_csv(r"/content/sample_data/adult.data", header=None)
print(adults_data.shape)
print(adults_data.columns)
#Give column names
adults_data.columns = ["C" + str(x) for x in range(adults_data.shape[1])]
print(adults_data.columns)
#Read a column
print(adults_data.loc[:,["C1","C14"]])
print(adults_data.loc[:,"C1":"C14"])

In [None]:
# Read top 5 rows

adults_data.head()

# Read last 5 rows

adults_data.tail()