# Day 3: Pandas Introduction

## Import and Setup

In [None]:
import pandas as pd
import numpy as np

## Creating Series and DataFrame

In [None]:
s = pd.Series([10, 20, 30])
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [25, 30]})
print(s)
print(df)

0    10
1    20
2    30
dtype: int64
    Name  Age
0  Alice   25
1    Bob   30


## Load Dataset (Iris)

In [None]:
from sklearn.datasets import load_iris
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
print(df.head())

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2


## Inspecting Data

In [None]:
print(df.info())
print(df.describe())
print(df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB
None
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         150.000000        150.000000         150.000000   
mean            5.843333          3.057333           3.758000   
std             0.828066          0.435866           1.765298   
min             4.300000          2.000000           1.000000   
25%             5.100000          2.800000           1.600000   
50%             5.800000          3.000000           4.350000   
75%             6.400000          3.300000           5.100000   
max             7.900000          4.400000 

## Accessing Data

In [None]:
print(df.iloc[0])
print(df.loc[0, ['sepal length (cm)', 'sepal width (cm)']])

sepal length (cm)    5.1
sepal width (cm)     3.5
petal length (cm)    1.4
petal width (cm)     0.2
Name: 0, dtype: float64
sepal length (cm)    5.1
sepal width (cm)     3.5
Name: 0, dtype: float64


## Add/Drop Columns

In [None]:
df['sum'] = df.sum(axis=1)
df = df.drop(columns=['sum'])

## Built-in Methods

In [None]:
print(df.mean())
print(df.sort_values(by='sepal length (cm)').head())
print(df['sepal length (cm)'].value_counts())

sepal length (cm)    5.843333
sepal width (cm)     3.057333
petal length (cm)    3.758000
petal width (cm)     1.199333
dtype: float64
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
13                4.3               3.0                1.1               0.1
8                 4.4               2.9                1.4               0.2
42                4.4               3.2                1.3               0.2
38                4.4               3.0                1.3               0.2
41                4.5               2.3                1.3               0.3
sepal length (cm)
5.0    10
6.3     9
5.1     9
6.7     8
5.7     8
6.4     7
5.5     7
5.8     7
4.9     6
6.0     6
5.4     6
5.6     6
6.1     6
6.5     5
4.8     5
7.7     4
6.9     4
4.6     4
5.2     4
6.2     4
4.4     3
7.2     3
5.9     3
6.8     3
4.7     2
6.6     2
4.3     1
7.0     1
5.3     1
4.5     1
7.1     1
7.3     1
7.6     1
7.4     1
7.9     1
Name: count, dtype: int64


In [None]:
print("Completed Day 3 Notebook for QuantLake Internship ✅")


Completed Day 3 Notebook for QuantLake Internship ✅
