# Import Libraries needed by this notebook

In [None]:
import numpy as np
import pandas as pd

In [None]:
pd.__version__

# Series Object


*   A pandas Series is a one-dimensional labelled array.
*   A Series combines the best features of a list and a dictionary.
*   A Series maintains a single collection of ordered values (i.e. a single column of data).
*   We can assign each value an identifier, which does not have to be unique.





In [None]:
A = pd.Series([2,5,7,9,11], index=["a", "b","c","d","e"])
print(type(A))
print(A)
print(type(A.values))
print(A.values)
print(type(A.index))
print(A.index)
print(A["a"])
print(A["a":"c"])

# Create Series Object from Dictionary


*   The keys becomes identifiers and value becomes the items of the list.



In [None]:
grades_dic = { "A": 4, "A-": 3.5, "B": 3, "B-": 2.5, "C": 2}
marks_dic = { "A": 85, "A-": 80, "B": 75, "B-": 70, "C": 65}

grades_series = pd.Series(grades_dic)
marks_series = pd.Series(marks_dic)

print(grades_series)
print(marks_series)

# Intro to Series Methods



*   Sum
*   Product
*   Mean
*   std - Standard Deviation

> Standard deviation is a measure of how spread out values are in a dataset.
> *   Low standard deviation → values are close to the average
> *   High standard deviation → values are widely spread out

> Think of Standard deviation as the “typical distance” of data points from the mean.

> #### Formal definition
Standard deviation is the square root of the variance.

> #### Why use it?
> *   Understand variability in data
> *   Compare consistency (e.g., test scores, sales, sensor readings)
> *   Core to statistics, machine learning, and data analysis


> #### Rule of thumb (normal distribution)
> *   ~68% of values lie within ±1 std dev of the mean
> *   ~95% within ±2 std dev
> *   ~99.7% within ±3 std dev

In [None]:
prices = pd.Series([2.99, 4.45, 1.36])

print("-------The Series-------")
print(prices)

print("-------The sum-------")
print(prices.sum())

print("-------The product-------")
print(prices.product())

print("-------The Means-------")
print(prices.mean())

print("-------The Standard Deviation-------")
print(prices.std())

# Intro to Attributes

*   An attribute is a piece of data that lives on an object.
*   An attribute is a fact, a detail, a characteristic of the object.
*   Access an attribute with object.attribute syntax.


In [None]:
adjectives = pd.Series(["Smart","Handsome", "Charming", "Brilliant", "Humble", "Smart"])
print(f"No Duplication: {adjectives.is_unique}")
print(f"Values: {adjectives.values}")
print(f"Index: {adjectives.index}")

# Import Series with the pd.read_csv Function

*   Pandas ships with many different read_ functions for different types of files.
*   The read_csv function accepts many different parameters. The first one specifies the file name/path.
*   The read_csv function will import the dataset as a DataFrame, a 2-dimensional table.
*   The usecols parameter accepts a list of the column(s) to import.
*   The squeeze method converts a DataFrame to a Series.


In [None]:
pokeman_data = pd.read_csv("/content/sample_data/pokemon.csv")
print(type(pokeman_data))
print(pokeman_data)
print("------------------------------")
pokeman_data = pd.read_csv("/content/sample_data/pokemon.csv", usecols=["Pokemon"])
print(type(pokeman_data))
print(pokeman_data)
print("------------Squeeze only works if dataframe has one column------------------")
pokeman_data = pd.read_csv("/content/sample_data/pokemon.csv", usecols=["Pokemon"]).squeeze('columns')
print(type(pokeman_data))
print(pokeman_data)

In [None]:
google_data = pd.read_csv("/content/sample_data/google_stock_price.csv")
print(type(google_data))
print(google_data)
print("------------------------------")
google_data = pd.read_csv("/content/sample_data/google_stock_price.csv", usecols=["Stock Price"])
print(type(google_data))
print(google_data)
print("------------Squeeze only works if dataframe has one column------------------")
google_data = pd.read_csv("/content/sample_data/google_stock_price.csv", usecols=["Stock Price"]).squeeze('columns')
print(type(google_data))
print(google_data)
print("------------Head and tail------------------")
google_data = pd.read_csv("/content/sample_data/google_stock_price.csv", usecols=["Stock Price"]).squeeze('columns')
print(google_data.head(6))
print(google_data.tail(6))



# Passing Series to Python's Built-In Functions



*   The len function returns the length of the Series.
*   The type function returns the type of an object.
*   The list function converts the Series to a list.
*   The dict function converts the Series to a dictionary.
*   The sorted function converts the Series to a sorted list.
*   The max function returns the largest value in the Series.
*   The min function returns the smallest value in the Series.


In [None]:
len(pokeman_data)
type(google_data)
list(pokeman_data)
dict(pokeman_data)
sorted(google_data)
max(pokeman_data)
min(pokeman_data)

# Read CSV and mark certain column as an Index column And sort by values/index

In [None]:
pokeman_data = pd.read_csv("/content/sample_data/pokemon.csv", index_col=["Pokemon"]).squeeze('columns')
print(type(pokeman_data))
print(pokeman_data)
print("----------------Sort By Values--------------")
print(pokeman_data.sort_values())
print("----------------Sprt by Index--------------")
print(pokeman_data.sort_index())

# Extract Series Value by Index Position



*   Use the iloc accessor to extract a Series value by its index position.
*   iloc is short for "index location".
*   Python's list slicing syntaxes (slices, slices from start, slices to end, etc.) are supported with Series objects.



In [None]:
pokeman_data = pd.read_csv("/content/sample_data/pokemon.csv").squeeze('columns')
print(pokeman_data.iloc[[0,3,7]])
print("------------------------------")
print(pokeman_data.iloc[100])
print("------------------------------")
print(pokeman_data.iloc[0:5])
print("--------------From start till index 4----------------")
print(pokeman_data.iloc[:5])
print("--------------From 715 till end----------------")
print(pokeman_data.iloc[715:])
print("--------------Pull last value in series----------------")
print(pokeman_data.iloc[-1])
print("--------------Pull last five in series----------------")
print(pokeman_data.iloc[-5:-1])
print("--------------Pull last five in series----------------")
print(pokeman_data.iloc[-5:])

# Extract Series Value by Index Label



*   Use the loc accessor to extract a Series value by its index label.
*   Pass a list to extract multiple values by index label.
*   If one index label/position in the list does not exist, Pandas will raise an error.



In [None]:
pokemon_data = pd.read_csv("/content/sample_data/pokemon.csv", index_col=["Pokemon"]).squeeze('columns')
print(pokemon_data.loc["Venusaur"])
print("------------------------------")
print(pokemon_data.loc[["Venusaur", "Charmander", "Yveltal"]])
print("------------------------------")
print(pokemon_data.loc["Bulbasaur":"Charmeleon"])

# The get Method on a Series


*   The get method extracts a Series value by index label. It is an alternative option to square brackets.
*   The get method's second argument sets the fallback value to return if the label/position does not exist.



In [None]:
pokemon_data = pd.read_csv("/content/sample_data/pokemon.csv", index_col=["Pokemon"]).squeeze('columns')
print(pokemon_data.get("Venusaur"))
print("------------------------------")
print(pokemon_data.get("Venusaur1","missed"))
print("------------------------------")
print(pokemon_data.get("Venusaur","missed"))
print("------------------------------")
print(pokemon_data.get(["Bulbasaur1","Charmeleon1"], "missed"))
None

# Overwrite a Series Value


*   Use the loc/iloc accessor to target an index label/position, then use an equal sign to provide a new value.



In [60]:
pokemon_data = pd.read_csv("/content/sample_data/pokemon.csv", index_col=["Pokemon"]).squeeze('columns')
print(pokemon_data.loc["Venusaur"])
print("------------------------------")
pokemon_data.loc["Venusaur"] = "Electric"
print("------------------------------")
print(pokemon_data.loc["Venusaur"])

Grass
------------------------------
------------------------------
Electric


# Dataframe

*   It is the core data structure in pandas and is used to work with tabular (row–column) data—like spreadsheets or database tables.
*   A DataFrame is a two-dimensional, labeled data structure with:
    - Rows (index)
    - Columns (labels)
    - Potentially different data types per column

In [None]:
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "City": ["New York", "Chicago", "San Francisco"]
}

df = pd.DataFrame(data)
print(df)

### Add a new column

In [None]:
D = pd.DataFrame({"Marks": marks_series, "Graded": grades_series})

print(D)

D['Scaled Marks'] = D["Marks"] / 90

print(D)

### Applying Masking / Filter

In [None]:
G = D[D["Marks"] > 70]
G

# Read CSV File donwloaded from UCI Repository - Adults dataset

In [None]:
adults_data = pd.read_csv(r"/content/sample_data/adult.data", header=None)
print(adults_data.shape)
print(adults_data.columns)
#Give column names
adults_data.columns = ["C" + str(x) for x in range(adults_data.shape[1])]
print(adults_data.columns)
#Read a column
print(adults_data.loc[:,["C1","C14"]])
print(adults_data.loc[:,"C1":"C14"])

In [None]:
# Read top 5 rows

adults_data.head()

# Read last 5 rows

adults_data.tail()