In [1]:
import pandas as pd

# Creating a simple DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'Salary': [50000, 60000, 70000, 80000]
}

df = pd.DataFrame(data)
print(df)


      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000
3    David   40   80000


In [3]:
# selecting data
df['Name']

0      Alice
1        Bob
2    Charlie
3      David
Name: Name, dtype: object

In [4]:
df['Age']

0    25
1    30
2    35
3    40
Name: Age, dtype: int64

In [8]:
df['Salary']

0    50000
1    60000
2    70000
3    80000
Name: Salary, dtype: int64

In [10]:
df.loc[1]

Name        Bob
Age          30
Salary    60000
Name: 1, dtype: object

In [11]:
df.loc[0]

Name      Alice
Age          25
Salary    50000
Name: 0, dtype: object

In [12]:
df.iloc[0]

Name      Alice
Age          25
Salary    50000
Name: 0, dtype: object

In [13]:
df.iloc[2]

Name      Charlie
Age            35
Salary      70000
Name: 2, dtype: object

In [15]:
# conditional selection
df[df['Age']>36]

Unnamed: 0,Name,Age,Salary
3,David,40,80000


In [16]:
df[df['Age']<36]

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,60000
2,Charlie,35,70000


In [13]:
# Dropping Rows with Missing Values


import pandas as pd
data = {'Name': ['Alice', 'Bob', 'Charlie', None],
        'Age': [25, None, 35, 40],
        'City': ['New York', 'Los Angeles', 'Chicago', 'San Francisco']}

df = pd.DataFrame(data)

In [14]:
print("Original DataFrame:")
print(df)

# Drop rows with any missing values
df_cleaned = df.dropna()

print("\nDataFrame after dropping rows with missing values:")
print(df_cleaned)


Original DataFrame:
      Name   Age           City
0    Alice  25.0       New York
1      Bob   NaN    Los Angeles
2  Charlie  35.0        Chicago
3     None  40.0  San Francisco

DataFrame after dropping rows with missing values:
      Name   Age      City
0    Alice  25.0  New York
2  Charlie  35.0   Chicago


In [15]:
df.isnull()

# Fill missing values
df.fillna('Unknown')

# Drop rows with missing values
df.dropna(inplace=True)

In [16]:
# Group by a column and compute mean


df.groupby('City')['Age'].mean()

# Multiple aggregations
df.groupby('City').agg({'Age': ['mean', 'sum'], 'Name': 'count'})


Unnamed: 0_level_0,Age,Age,Name
Unnamed: 0_level_1,mean,sum,count
City,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Chicago,35.0,35.0,1
New York,25.0,25.0,1


In [17]:
# Merging DataFrames in Pandas


import pandas as pd

# Create two DataFrames
df1 = pd.DataFrame({
    'Employee_ID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Department': ['HR', 'Engineering', 'Finance', 'Marketing']
})

df2 = pd.DataFrame({
    'Employee_ID': [1, 2, 5],
    'Salary': [50000, 60000, 70000],
    'Location': ['New York', 'Los Angeles', 'Chicago']
})

# Display the DataFrames
print("DataFrame 1:")
print(df1)

print("\nDataFrame 2:")
print(df2)

# Merge the DataFrames on 'Employee_ID' (common column)
merged_df = pd.merge(df1, df2, on='Employee_ID', how='inner')

print("\nMerged DataFrame (Inner Join):")
print(merged_df)


DataFrame 1:
   Employee_ID     Name   Department
0            1    Alice           HR
1            2      Bob  Engineering
2            3  Charlie      Finance
3            4    David    Marketing

DataFrame 2:
   Employee_ID  Salary     Location
0            1   50000     New York
1            2   60000  Los Angeles
2            5   70000      Chicago

Merged DataFrame (Inner Join):
   Employee_ID   Name   Department  Salary     Location
0            1  Alice           HR   50000     New York
1            2    Bob  Engineering   60000  Los Angeles
