In [1]:
# Day 12 — Pandas Deep Dive (With Normalization Examples)

"""
Topics to Understand Today:

1. Pandas Basics:
   - Series vs DataFrame
   - df.head(), df.info(), df.describe()

2. Row & Column Selection:
   - df['Column'], df[['Col1','Col2']]
   - df.loc[row_index, "Column"]
   - df.iloc[row_number, column_number]

3. Filtering Data:
   - df[df["Age"] > 30]
   - df[(df["City"]=="Kanpur") & (df["Score"] > 50)]

4. GroupBy & Aggregation:
   - df.groupby("Team")["Score"].mean()

5. Pivot Table:
   - pd.pivot_table(df, values='Score', index='City', columns='Team', aggfunc='mean')

6. Sorting Data:
   - df.sort_values("Age", ascending=False)

7. Merging / Joining:
   - pd.merge(df1, df2, on="id", how="inner")

---------------------------------------------------
Normalization (Important Concept in Data Cleaning):
---------------------------------------------------

# Definition:
Normalization is used to scale numeric data so all values fall within the same range (e.g., 0 to 1).

# Example Code:
"""

import pandas as pd

# Example Data
df = pd.DataFrame({
    'Marks': [45, 78, 88, 56, 92, 70]
})

print("Original Data:")
print(df)

# 1️⃣ Min-Max Normalization → Range (0 to 1)
df['Marks_MinMax'] = (df['Marks'] - df['Marks'].min()) / (df['Marks'].max() - df['Marks'].min())

# 2️⃣ Z-Score Standardization → Mean = 0, Std = 1
df['Marks_ZScore'] = (df['Marks'] - df['Marks'].mean()) / df['Marks'].std()

print("\nAfter Normalization:")
print(df)


Original Data:
   Marks
0     45
1     78
2     88
3     56
4     92
5     70

After Normalization:
   Marks  Marks_MinMax  Marks_ZScore
0     45      0.000000     -1.445909
1     78      0.702128      0.354657
2     88      0.914894      0.900283
3     56      0.234043     -0.845720
4     92      1.000000      1.118533
5     70      0.531915     -0.081844
