In [None]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [24, 27, 22],
    'City': ['New York', 'San Diego', 'Seattle'],
    'Marks': [85, 90, 78]
}

df = pd.DataFrame(data)

high_marks = df[df['Marks'] > 80]
print("Rows where Marks > 80:")
print(high_marks)

city_series = df['City']
print("\nCity column as a series:")
print(city_series)

df.to_csv('example.csv', index=False)

Rows where Marks > 80:
    Name  Age       City  Marks
0  Alice   24   New York     85
1    Bob   27  San Diego     90

City column as a series:
0     New York
1    San Diego
2      Seattle
Name: City, dtype: object


In [None]:
df_csv = pd.read_csv('example.csv')

print("\nFirst 5 rows of the CSV file:")
print(df_csv.head())

print("\nSummary statistics of the DataFrame:")
print(df_csv.describe())

missing_values = df_csv.isna().sum()
print("\nMissing values in each column:")
print(missing_values)


First 5 rows of the CSV file:
      Name  Age       City  Marks
0    Alice   24   New York     85
1      Bob   27  San Diego     90
2  Charlie   22    Seattle     78

Summary statistics of the DataFrame:
             Age      Marks
count   3.000000   3.000000
mean   24.333333  84.333333
std     2.516611   6.027714
min    22.000000  78.000000
25%    23.000000  81.500000
50%    24.000000  85.000000
75%    25.500000  87.500000
max    27.000000  90.000000

Missing values in each column:
Name     0
Age      0
City     0
Marks    0
dtype: int64


In [None]:
data_cleaning = {
    'ID': [101, 102, 103, 104],
    'Name': ['John', 'Emily', 'Sarah', 'Michael'],
    'Age': [25, None, 30, None],
    'Salary': [50000, 60000, None, None]
}

df_clean = pd.DataFrame(data_cleaning)

average_age = df_clean['Age'].mean()
df_clean['Age'].fillna(average_age, inplace=True)

df_clean['Salary'].fillna(0, inplace=True)

df_clean.dropna(how='all', inplace=True)

print("\nCleaned DataFrame:")
print(df_clean)


Cleaned DataFrame:
    ID     Name   Age   Salary
0  101     John  25.0  50000.0
1  102    Emily  27.5  60000.0
2  103    Sarah  30.0      0.0
3  104  Michael  27.5      0.0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_clean['Age'].fillna(average_age, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_clean['Salary'].fillna(0, inplace=True)


In [None]:
data_grouping = {
    'Department': ['HR', 'IT', 'HR', 'IT', 'HR'],
    'Employee': ['John', 'Alice', 'Sarah', 'Bob', 'Emily'],
    'Salary': [50000, 80000, 55000, 70000, 60000]
}

df_group = pd.DataFrame(data_grouping)

total_salary = df_group.groupby('Department')['Salary'].sum().reset_index()
total_salary.columns = ['Department', 'Total_Salary']
print("\nTotal salary per department:")
print(total_salary)

average_salary = df_group.groupby('Department')['Salary'].mean().reset_index()
average_salary.columns = ['Department', 'Average_Salary']
print("\nAverage salary per department:")
print(average_salary)


Total salary per department:
  Department  Total_Salary
0         HR        165000
1         IT        150000

Average salary per department:
  Department  Average_Salary
0         HR         55000.0
1         IT         75000.0
