In [None]:
import numpy as np
import pandas as pd

## 1. Making a dataframe

In [None]:
# Importing pandas

# 1. Creating a DataFrame from a dictionary of lists
data_dict = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "City": ["New York", "Los Angeles", "Chicago"]
}
df_dict = pd.DataFrame(data_dict)
print("DataFrame from Dictionary of Lists:\n", df_dict)

# 2. Creating a DataFrame from a list of dictionaries
data_list_dict = [
    {"Name": "Alice", "Age": 25, "City": "New York"},
    {"Name": "Bob", "Age": 30, "City": "Los Angeles"},
    {"Name": "Charlie", "Age": 35, "City": "Chicago"}
]
df_list_dict = pd.DataFrame(data_list_dict)
print("\nDataFrame from List of Dictionaries:\n", df_list_dict)

# 3. Creating a DataFrame from a list of lists with column names
data_list = [
    ["Alice", 25, "New York"],
    ["Bob", 30, "Los Angeles"],
    ["Charlie", 35, "Chicago"]
]
columns = ["Name", "Age", "City"]
df_list = pd.DataFrame(data_list, columns=columns)
print("\nDataFrame from List of Lists:\n", df_list)

# 4. Creating a DataFrame from a dictionary of Series
data_series = {
    "Name": pd.Series(["Alice", "Bob", "Charlie"]),
    "Age": pd.Series([25, 30, 35]),
    "City": pd.Series(["New York", "Los Angeles", "Chicago"])
}
df_series = pd.DataFrame(data_series)
print("\nDataFrame from Dictionary of Series:\n", df_series)


DataFrame from Dictionary of Lists:
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago

DataFrame from List of Dictionaries:
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago

DataFrame from List of Lists:
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago

DataFrame from Dictionary of Series:
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


## 2.  Exploring the DataFrame


In [None]:
print("\nBasic Information:")
print(df_dict.info())

print("\nSummary Statistics:")
print(df_dict.describe())
# df_dict.describe()


Unnamed: 0,Age
count,3.0
mean,30.0
std,5.0
min,25.0
25%,27.5
50%,30.0
75%,32.5
max,35.0


## 3. Indexing and other methods

In [None]:
# 1. Creating a DataFrame from a dictionary of lists
data_dict = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eva"],
    "Age": [25, 30, 35, 40, 22],
    "City": ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"],
    "Score": [85, 90, 95, 88, 76]
}
df = pd.DataFrame(data_dict)
print("Original DataFrame:\n", df)

# 2. Indexing and Selecting Data
print("\nSelecting a Single Column ('Name'):")
print(df["Name"])

print("\nSelecting Multiple Columns ('Name' and 'City'):")
print(df[["Name", "City"]])

print("\nSelecting Rows by Index using .loc (label-based):")
print(df.loc[1:3])  # Rows from index 1 to 3 (inclusive)

print("\nSelecting Rows by Index using .iloc (position-based):")
print(df.iloc[1:3])  # Rows from position 1 to 2 (exclusive of 3)

print("\nSelecting Specific Rows and Columns (Name and Age for rows 1 and 3):")
print(df.loc[[1, 3], ["Name", "Age"]])

# 3. Renaming Columns and Indexes
print("\nRenaming Columns ('Name' -> 'Full Name'):")
df_renamed = df.rename(columns={"Name": "Full Name"})
print(df_renamed)

print("\nRenaming Indexes (0 -> 'A', 1 -> 'B'):")
df_renamed_idx = df.rename(index={0: "A", 1: "B"})
print(df_renamed_idx)

# 4. Adding and Dropping Columns
print("\nAdding a New Column ('Passed'):")
df["Passed"] = df["Score"] > 80
print(df)

print("\nDropping a Column ('City'):")
df_dropped_col = df.drop(columns=["City"])
print(df_dropped_col)

# 5. Dropping Rows
print("\nDropping a Row (index 2):")
df_dropped_row = df.drop(index=2)
print(df_dropped_row)

# 6. Filtering Data
print("\nFiltering Rows where 'Age' > 30:")
filtered_df = df[df["Age"] > 30]
print(filtered_df)

print("\nFiltering Rows where 'City' is 'New York':")
filtered_city_df = df[df["City"] == "New York"]
print(filtered_city_df)

# 7. Sorting Data
print("\nSorting by 'Age' (Ascending):")
sorted_df = df.sort_values(by="Age")
print(sorted_df)

print("\nSorting by 'Score' (Descending):")
sorted_df_desc = df.sort_values(by="Score", ascending=False)
print(sorted_df_desc)

# 8. Handling Missing Data
df_with_nan = df.copy()
df_with_nan.loc[1, "City"] = None
print("\nDataFrame with Missing Data:\n", df_with_nan)

print("\nDropping Rows with Missing Data:")
print(df_with_nan.dropna())

print("\nFilling Missing Data:")
print(df_with_nan.fillna("Unknown"))

# 9. Aggregations and Grouping
print("\nGrouped by 'Passed' (Mean of 'Age' and 'Score'):")
grouped_df = df.groupby("Passed").mean()
print(grouped_df)

# 10. Resetting and Setting Index
print("\nResetting the Index:")
reset_df = df.reset_index(drop=True)
print(reset_df)

print("\nSetting 'Name' as the Index:")
indexed_df = df.set_index("Name")
print(indexed_df)

print("\nAccessing a Row by Index Name ('Alice'):")
print(indexed_df.loc["Alice"])

# 11. String Operations
print("\nConverting 'City' to Uppercase:")
df["City"] = df["City"].str.upper()
print(df)

# 12. Combining DataFrames
df2 = pd.DataFrame({
    "Name": ["Frank", "Grace"],
    "Age": [28, 24],
    "City": ["Dallas", "Austin"],
    "Score": [83, 89]
})
print("\nConcatenating Two DataFrames:")
combined_df = pd.concat([df, df2], ignore_index=True)
print(combined_df)

# 13. Saving and Loading DataFrames
df.to_csv("example.csv", index=False)
print("\nDataFrame saved to 'example.csv'.")

loaded_df = pd.read_csv("example.csv")
print("\nLoaded DataFrame from CSV:\n", loaded_df)
