In [1]:
# Simple DataFrame Operations Project (Beginner Version)

# 1. Import Pandas
import pandas as pd  # pandas helps us work with tables (like Excel)

# 2. Create a sample dataset as a dictionary
# This is just like making a table with columns and rows
data = {
    'Name': ['John', 'Alice', 'Bob', 'Charlie', 'David'],
    'Age': [23, 30, 25, 35, 28],
    'City': ['New York', 'Los Angeles', 'Chicago', 'San Francisco', 'New York'],
    'Salary': [50000, 70000, 45000, 80000, 55000]
}

# 3. Convert the dictionary into a DataFrame (table)
df = pd.DataFrame(data)

# 4. Print the whole DataFrame
echo = print("\nFull DataFrame:\n", df)

# 5. Basic operations:

# a. Show Name and Salary columns only
print("\nName and Salary columns:\n", df[['Name', 'Salary']])

# b. Show only the first 3 rows
print("\nFirst 3 rows:\n", df.head(3))

# c. Calculate the average salary
average_salary = df['Salary'].mean()
print("\nAverage Salary:", average_salary)

# d. Show only people aged 30 or more
filtered_df = df[df['Age'] >= 30]
print("\nPeople aged 30 or more:\n", filtered_df)

# 6. Sorting:

# Sort the table by Salary from highest to lowest
sorted_by_salary = df.sort_values(by='Salary', ascending=False)
print("\nSorted by Salary (Descending):\n", sorted_by_salary)

# Sort by City then Age (both from A-Z)
sorted_by_city_age = df.sort_values(by=['City', 'Age'], ascending=True)
print("\nSorted by City and Age:\n", sorted_by_city_age)

# 7. Add missing data (None means missing)
df.loc[2, 'Salary'] = None  # Bob has missing salary
df.loc[4, 'City'] = None  # David has missing city

# a. Show DataFrame with missing values
print("\nWith missing data:\n", df)

# b. Fill missing Salary with the average
df['Salary'] = df['Salary'].fillna(average_salary)
print("\nFilled missing Salary:\n", df)

# c. Fill missing City with 'Unknown'
df['City'] = df['City'].fillna('Unknown')
print("\nFilled missing City:\n", df)

# d. Drop rows that have missing values (just in case)
df_dropped = df.dropna()
print("\nDropped missing rows:\n", df_dropped)

# 8. Save the clean table to a file (like Excel but CSV)
df.to_csv('Cleaned_data.csv', index=False)
print("\nData saved to 'Cleaned_data.csv'")

# BONUS: Show average salary per city
grouped = df.groupby('City')['Salary'].mean()
print("\nAverage Salary by City:\n", grouped)


Full DataFrame:
       Name  Age           City  Salary
0     John   23       New York   50000
1    Alice   30    Los Angeles   70000
2      Bob   25        Chicago   45000
3  Charlie   35  San Francisco   80000
4    David   28       New York   55000

Name and Salary columns:
       Name  Salary
0     John   50000
1    Alice   70000
2      Bob   45000
3  Charlie   80000
4    David   55000

First 3 rows:
     Name  Age         City  Salary
0   John   23     New York   50000
1  Alice   30  Los Angeles   70000
2    Bob   25      Chicago   45000

Average Salary: 60000.0

People aged 30 or more:
       Name  Age           City  Salary
1    Alice   30    Los Angeles   70000
3  Charlie   35  San Francisco   80000

Sorted by Salary (Descending):
       Name  Age           City  Salary
3  Charlie   35  San Francisco   80000
1    Alice   30    Los Angeles   70000
4    David   28       New York   55000
0     John   23       New York   50000
2      Bob   25        Chicago   45000

Sorted by City 