# 🐼 Pandas — Data Analysis in Python

Pandas is a fast, powerful, and flexible tool for working with:
- Tables (like spreadsheets or SQL tables)
- CSV and Excel files
- Labeled and structured data

Key objects:
- `Series`: 1D labeled array
- `DataFrame`: 2D table (like an Excel sheet or SQL table)

In [None]:
import pandas as pd

✅ Creating Series and DataFrames

In [1]:
import pandas as pd

# Series
s = pd.Series([10, 20, 30])
print(s)

# DataFrame
data = {
    "Name": ["Alice", "Enzo", "Charlie"],
    "Age": [25, 30, 35]
}
df = pd.DataFrame(data)
print(df)

0    10
1    20
2    30
dtype: int64
      Name  Age
0    Alice   25
1     Enzo   30
2  Charlie   35


✅ Reading and Writing CSV

In [2]:
import pandas as pd

data = {
    "Name": ["Alice", "Enzo", "Charlie"],
    "Age": [25, 30, 35]
}

df = pd.DataFrame(data)

print(df.head())     # First 5 rows
print(df.tail())     # Last 5 rows
print(df.shape)      # (rows, columns)
print(df.columns)    # Column names
print(df.info())     # Overview
print(df.describe()) # Stats summary

      Name  Age
0    Alice   25
1     Enzo   30
2  Charlie   35
      Name  Age
0    Alice   25
1     Enzo   30
2  Charlie   35
(3, 2)
Index(['Name', 'Age'], dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 180.0+ bytes
None
        Age
count   3.0
mean   30.0
std     5.0
min    25.0
25%    27.5
50%    30.0
75%    32.5
max    35.0


✅ Selecting Columns and Rows

In [None]:
import pandas as pd

data = {
    "Name": ["Alice", "Enzo", "Charlie"],
    "Age": [25, 30, 35]
}

df = pd.DataFrame(data)

print(df["Name"])        # Column
print(df[["Name", "Age"]])  # Multiple columns

print(df.iloc[0])        # First row (by position)
print(df.loc[0])         # First row (by label/index)

0      Alice
1       Enzo
2    Charlie
Name: Name, dtype: object
      Name  Age
0    Alice   25
1     Enzo   30
2  Charlie   35
Name    Alice
Age        25
Name: 0, dtype: object
Name    Alice
Age        25
Name: 0, dtype: object


✅ Adding / Updating / Removing Columns

In [6]:
import pandas as pd

data = {
    "Name": ["Alice", "Enzo", "Charlie"],
    "Age": [25, 30, 35]
}

df = pd.DataFrame(data)

df["Salary"] = [50000, 60000, 20000]  # Add new column
df["Age"] = df["Age"] + 1             # Update values
print(df)
df.drop("Salary", axis=1, inplace=True)  # Remove column
print(df)


      Name  Age  Salary
0    Alice   26   50000
1     Enzo   31   60000
2  Charlie   36   20000
      Name  Age
0    Alice   26
1     Enzo   31
2  Charlie   36


In [14]:
import pandas as pd

data = {
    "Name": ["Anna", "Brian", "Cecilia", "Daniel"],
    "Age": [22, 35, 28, 40],
    "Score": [88, 92, 79, 85]    
}

df = pd.DataFrame(data)
print(df)

# Filter only people with scores > 85
high_scores = df[df["Score"] > 85]
print(high_scores)

# Add a new column "Passed"
df["Passed"] = df["Score"].apply(lambda x: "Yes" if x >= 80 else "No")
print(df)

# Print the avarege score
print("\nAvarege Score:", df["Score"].mean())

      Name  Age  Score
0     Anna   22     88
1    Brian   35     92
2  Cecilia   28     79
3   Daniel   40     85
    Name  Age  Score
0   Anna   22     88
1  Brian   35     92
      Name  Age  Score Passed
0     Anna   22     88    Yes
1    Brian   35     92    Yes
2  Cecilia   28     79     No
3   Daniel   40     85    Yes

Avarege Score: 86.0
