### 1. Create DataFrame from Dictionary:

Create a DataFrame from a dictionary of lists.

In [1]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [24, 27, 22],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Name,Age,City
0,Alice,24,New York
1,Bob,27,Los Angeles
2,Charlie,22,Chicago


### 2. Select Rows Based on Conditions:

Select rows where age is greater than 25.

In [2]:
result = df[df['Age'] > 25]
print(result)

  Name  Age         City
1  Bob   27  Los Angeles


### 3. Add and Delete Columns:

Add a new column 'Salary' and delete the 'City' column.

In [3]:
df['Salary'] = [70000, 80000, 60000]
df.drop('City', axis=1, inplace=True)
print(df)

      Name  Age  Salary
0    Alice   24   70000
1      Bob   27   80000
2  Charlie   22   60000


### 4. Group By and Aggregate:

Group the DataFrame by 'City' and calculate the mean age.

In [4]:
df = pd.DataFrame(data)
result = df.groupby('City')['Age'].mean()
print(result)

City
Chicago        22
Los Angeles    27
New York       24
Name: Age, dtype: int64


### 5. Handling Missing Data:

Identify missing values, fill them with the mean value, and drop rows with missing values.

In [5]:
df.loc[1, 'Age'] = None
print(df.isnull())
df['Age'].fillna(df['Age'].mean(), inplace=True)
print(df)
df.dropna(inplace=True)
print(df)

    Name    Age   City
0  False  False  False
1  False   True  False
2  False  False  False
      Name   Age         City
0    Alice  24.0     New York
1      Bob  23.0  Los Angeles
2  Charlie  22.0      Chicago
      Name   Age         City
0    Alice  24.0     New York
1      Bob  23.0  Los Angeles
2  Charlie  22.0      Chicago


### 6. Merge DataFrames:

Merge two DataFrames on a common column.

In [6]:
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [1, 2, 4], 'Age': [24, 27, 22]})
merged_df = pd.merge(df1, df2, on='ID')
print(merged_df)


   ID   Name  Age
0   1  Alice   24
1   2    Bob   27


### 7. Pivot Table:

Create a pivot table summarizing the data.

In [7]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Alice'],
    'Age': [24, 27, 22, 24],
    'Score': [85, 90, 95, 80]
}
df = pd.DataFrame(data)
pivot_table = df.pivot_table(values='Score', index='Name', columns='Age', aggfunc='mean')
print(pivot_table)

Age        22    24    27
Name                     
Alice     NaN  82.5   NaN
Bob       NaN   NaN  90.0
Charlie  95.0   NaN   NaN


### 8. Date and Time Handling:

Convert a column to datetime, extract year, month, and day, and filter data by date.

In [8]:
df = pd.DataFrame({'Date': ['2023-01-01', '2023-02-01', '2023-03-01'], 'Value': [100, 200, 300]})
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
filtered_df = df[df['Date'] > '2023-01-31']
print(filtered_df)

        Date  Value  Year  Month  Day
1 2023-02-01    200  2023      2    1
2 2023-03-01    300  2023      3    1


### 9. Reshape DataFrame:

Reshape the DataFrame using melt and pivot.

In [9]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Math': [85, 90, 95],
    'Science': [80, 85, 90]
})
melted_df = pd.melt(df, id_vars='Name', var_name='Subject', value_name='Score')
print(melted_df)
pivoted_df = melted_df.pivot(index='Name', columns='Subject', values='Score')
print(pivoted_df)

      Name  Subject  Score
0    Alice     Math     85
1      Bob     Math     90
2  Charlie     Math     95
3    Alice  Science     80
4      Bob  Science     85
5  Charlie  Science     90
Subject  Math  Science
Name                  
Alice      85       80
Bob        90       85
Charlie    95       90


### 10. Iterate Over DataFrame:

Iterate over rows and columns of a DataFrame.

In [10]:
for index, row in df.iterrows():
    print(index, row['Name'], row['Math'])

for column in df:
    print(column)
    print(df[column])


0 Alice 85
1 Bob 90
2 Charlie 95
Name
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
Math
0    85
1    90
2    95
Name: Math, dtype: int64
Science
0    80
1    85
2    90
Name: Science, dtype: int64


### 11. Read and Write CSV:

Read data from a CSV file and write data to a CSV file.

In [11]:
data_1 = pd.read_csv('sample.csv')
print(df)
data_1.to_csv('output.csv', index=False)

      Name  Math  Science
0    Alice    85       80
1      Bob    90       85
2  Charlie    95       90


### 12. Change Data Types:

Convert data types of columns.

In [15]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [24, 27, 22],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
df['Age'] = df['Age'].astype(float)
print(df.dtypes)

Name     object
Age     float64
City     object
dtype: object


### 13. Sort DataFrame:

Sort the DataFrame by one or more columns.

In [16]:
df = pd.DataFrame({'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [24, 27, 22]})
sorted_df = df.sort_values(by='Age')
print(sorted_df)

      Name  Age
2  Charlie   22
0    Alice   24
1      Bob   27


### 14. Filter and Select Data:

Use loc and iloc for filtering and selecting data.

In [17]:
df = pd.DataFrame({'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [24, 27, 22]})
print(df.loc[df['Age'] > 23])
print(df.iloc[0:2])

    Name  Age
0  Alice   24
1    Bob   27
    Name  Age
0  Alice   24
1    Bob   27


### 15. Merge and Join DataFrames:

Perform different types of merges and joins (inner, outer, left, right).

In [18]:
df1 = pd.DataFrame({'ID': [1, 2], 'Name': ['Alice', 'Bob']})
df2 = pd.DataFrame({'ID': [2, 3], 'Age': [27, 22]})
merged_df_inner = pd.merge(df1, df2, on='ID', how='inner')
merged_df_outer = pd.merge(df1, df2, on='ID', how='outer')
print(merged_df_inner)
print(merged_df_outer)

   ID Name  Age
0   2  Bob   27
   ID   Name   Age
0   1  Alice   NaN
1   2    Bob  27.0
2   3    NaN  22.0


### 16. Crosstab:

Create a crosstab to summarize data.

In [19]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Alice'],
    'Age': [24, 27, 22, 24],
    'Score': [85, 90, 95, 80]
}
df = pd.DataFrame(data)
crosstab = pd.crosstab(df['Name'], df['Score'])
print(crosstab)

Score    80  85  90  95
Name                   
Alice     1   1   0   0
Bob       0   0   1   0
Charlie   0   0   0   1


### 17. Apply Functions:

Apply custom functions to DataFrame columns using apply and applymap.

In [20]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [10, 20, 30]})
df['A_squared'] = df['A'].apply(lambda x: x**2)
df['B_doubled'] = df['B'].apply(lambda x: x*2)
print(df)

   A   B  A_squared  B_doubled
0  1  10          1         20
1  2  20          4         40
2  3  30          9         60


### 18. Remove Duplicates:

Identify and remove duplicate rows from a DataFrame.

In [21]:
data = {'Name': ['Alice', 'Bob', 'Alice'], 'Age': [24, 27, 24]}
df = pd.DataFrame(data)
df_no_duplicates = df.drop_duplicates()
print(df_no_duplicates)

    Name  Age
0  Alice   24
1    Bob   27


### 19. Handling Missing Data:

Replace missing values with a specific value or method (e.g., forward fill, backward fill).

In [23]:
import numpy as np
df = pd.DataFrame({'A': [1, np.nan, 3], 'B': [4, 5, np.nan]})
df.fillna(0, inplace=True)
df['B'].fillna(method='ffill', inplace=True)
print(df)

     A    B
0  1.0  4.0
1  0.0  5.0
2  3.0  0.0


### 20. Descriptive Statistics:

Calculate descriptive statistics like mean, median, standard deviation, and describe the DataFrame.

In [24]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
mean_A = df['A'].mean()
median_B = df['B'].median()
std_A = df['A'].std()
desc_stats = df.describe()
print(f"Mean of A: {mean_A}, Median of B: {median_B}, Std Dev of A: {std_A}")
print(desc_stats)

Mean of A: 2.0, Median of B: 5.0, Std Dev of A: 1.0
         A    B
count  3.0  3.0
mean   2.0  5.0
std    1.0  1.0
min    1.0  4.0
25%    1.5  4.5
50%    2.0  5.0
75%    2.5  5.5
max    3.0  6.0
