## Cell 1: Code: import pandas as pd


In [2]:

import pandas as pd

data = [1, 2.3, 'a', 4, 5] #different types of data
series_from_list = pd.Series(data)
print(series_from_list)


0      1
1    2.3
2      a
3      4
4      5
dtype: object


## Cell 2: Code: data frame

In [3]:
#creating a dataframe
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [23, 25, 22, 24],
    'Score': [85, 90, 78, 92]
}

df = pd.DataFrame(data)
print(df)


      Name  Age  Score
0    Alice   23     85
1      Bob   25     90
2  Charlie   22     78
3    David   24     92


## Cell 3: Code


In [5]:
#This code multiplies two pandas Series together element-wise based on their index labels, keeping only matching indexes and giving NaN where an index exists in only one Series
s1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
s2 = pd.Series([4, 5, 6], index=["b", "c", "d"])
print(s1 * s2)


a     NaN
b     8.0
c    15.0
d     NaN
dtype: float64


## Cell 4: code to give sum of 2 series


In [6]:

series_a = pd.Series([1, 2, 3])
series_b = pd.Series([4, 5, 6])
sum_series = series_a + series_b 
print(sum_series)


0    5
1    7
2    9
dtype: int64


## Cell 5

In [7]:
# This code creates a pandas Series with a MultiIndex (Alphabet × Subject) to store and display hierarchical data
arrays = [
    ['A', 'A', 'B', 'B'],
    ['Math', 'Science', 'Math', 'Science']
]
index = pd.MultiIndex.from_arrays(arrays, names=('Alphabet', 'Subject'))

multi_s = pd.Series([90, 85, 88, 92], index=index)
print(multi_s)


Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64


## Cell 6


In [9]:
# This code creates a pandas Series with a MultiIndex built from tuples to represent scores by Alphabet and Subject
import pandas as pd  

tuples = [('A', 'Math'), ('A', 'Science'), ('B', 'Math'), ('B', 'Science')]
index = pd.MultiIndex.from_tuples(tuples, names=('Alphabet', 'Subject'))

multi_s = pd.Series([90, 85, 88, 92], index=index)
print(multi_s)


Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64


## Cell 7


In [10]:
# This code creates a pandas Series with a MultiIndex from the Cartesian product of Alphabets (A, B) and Subjects (Math, Science)
index = pd.MultiIndex.from_product(
    [['A', 'B'], ['Math', 'Science']],
    names=('Alphabet', 'Subject')
)

multi_s = pd.Series([90, 85, 88, 92], index=index)
print(multi_s)


Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64


## Cell 8


In [11]:
# This code creates a pandas Series with a MultiIndex directly from a DataFrame’s columns (Alphabet and Subject).
df = pd.DataFrame({
    'Alphabet': ['A', 'A', 'B', 'B'],
    'Subject': ['Math', 'Science', 'Math', 'Science']
})
index = pd.MultiIndex.from_frame(df, names=('Alphabet', 'Subject'))

multi_s = pd.Series([90, 85, 88, 92], index=index)
print(multi_s)


Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64


## Cell 9: Group data and aggregate with groupby

**This code demonstrates multiple ways to create and work with MultiIndex in pandas**— including building from arrays, tuples, and product, slicing and accessing data, swapping/reordering levels, grouping and aligning, cross-sections, sorting, and removing unused index levels.


In [14]:
import pandas as pd
import numpy as np

# ---- MultiIndex from arrays ----
arrays = [
    ["A", "A", "B", "B"],
    ["Math", "Science", "Math", "Science"]
]
index = pd.MultiIndex.from_arrays(arrays, names=("Alphabet", "Subject"))
multi_s = pd.Series([90, 85, 88, 92], index=index)
print("MultiIndex Series from arrays:\n", multi_s, "\n")

# ---- MultiIndex from tuples ----
tuples = [
    ("A", "Math"), ("A", "Science"),
    ("B", "Math"), ("B", "Science")
]
index2 = pd.MultiIndex.from_tuples(tuples, names=("Alphabet", "Subject"))
multi_s2 = pd.Series([70, 75, 80, 82], index=index2)
print("MultiIndex Series from tuples:\n", multi_s2, "\n")

# ---- MultiIndex from product ----
iterables = [["A", "B"], ["Math", "Science"]]
index3 = pd.MultiIndex.from_product(iterables, names=("Alphabet", "Subject"))
multi_s3 = pd.Series(np.random.randint(60, 100, size=4), index=index3)
print("MultiIndex Series from product:\n", multi_s3, "\n")

# ---- Accessing data ----
print("Access all subjects for 'A':\n", multi_s.loc["A"], "\n")
print("Access specific element (B, Science):\n", multi_s.loc[("B", "Science")], "\n")

# ---- Slicing ----
print("Slicing from A to B:\n", multi_s.loc["A":"B"], "\n")
print("Partial slice for all Math:\n", multi_s.loc[:, "Math"], "\n")

# ---- Reordering levels ----
print("Swapping levels:\n", multi_s.swaplevel(), "\n")
print("Reordering levels:\n", multi_s3.reorder_levels(["Subject", "Alphabet"]), "\n")

# ---- MultiIndex automatic construction ----
multi_s_auto = pd.Series(
    np.random.randn(4),
    index=pd.MultiIndex.from_arrays([["A", "A", "B", "B"], ["X", "Y", "X", "Y"]])
)
print("MultiIndex Series constructed automatically:\n", multi_s_auto, "\n")

df_auto = pd.DataFrame(
    np.random.randn(4, 2),
    index=pd.MultiIndex.from_arrays([["Group1", "Group1", "Group2", "Group2"],
                                     ["One", "Two", "One", "Two"]]),
    columns=["Score1", "Score2"]
)
print("DataFrame with MultiIndex automatically:\n", df_auto, "\n")

# ---- Groupby + reindex example ----
df = pd.DataFrame({
    "Math": [85, 90, 95, 80],
    "Science": [82, 88, 92, 84]
}, index=pd.MultiIndex.from_arrays([["A", "A", "B", "B"], ["one", "two", "one", "two"]]))
print("Original DataFrame:\n", df, "\n")

mean_by_group = df.groupby(level=0).mean()
print("Mean by group:\n", mean_by_group, "\n")

aligned = mean_by_group.reindex(df.index, level=0)
print("Reindexed to align with original index:\n", aligned, "\n")

# ---- Cross-section ----
print("Cross-section for level 'two':\n", df.xs("two", level=1), "\n")

# ---- Sorting MultiIndex ----
unsorted = multi_s_auto.sample(frac=1)   # shuffle randomly
print("Unsorted MultiIndex Series:\n", unsorted, "\n")
print("Sorted by index:\n", unsorted.sort_index(), "\n")

# MultiIndex columns + remove_unused_levels 
arrays = [["Score", "Score"], ["Math", "Science"]]
multi_columns = pd.MultiIndex.from_arrays(arrays, names=["Type", "Subject"])

df_multi = pd.DataFrame(
    [[85, 82], [90, 88], [95, 92], [80, 84]],
    columns=multi_columns
)

print("DataFrame with MultiIndex columns:\n", df_multi, "\n")

# Drop one column (Math only)
sub_df = df_multi[("Score", "Math")].to_frame()

print("Columns before removing unused levels:\n", sub_df.columns, "\n")
print("Columns after remove_unused_levels:\n", sub_df.columns.remove_unused_levels(), "\n")


MultiIndex Series from arrays:
 Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64 

MultiIndex Series from tuples:
 Alphabet  Subject
A         Math       70
          Science    75
B         Math       80
          Science    82
dtype: int64 

MultiIndex Series from product:
 Alphabet  Subject
A         Math       99
          Science    87
B         Math       75
          Science    90
dtype: int64 

Access all subjects for 'A':
 Subject
Math       90
Science    85
dtype: int64 

Access specific element (B, Science):
 92 

Slicing from A to B:
 Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64 

Partial slice for all Math:
 Alphabet
A    90
B    88
dtype: int64 

Swapping levels:
 Subject  Alphabet
Math     A           90
Science  A           85
Math     B           88
Science  B           92
dtype: int64 

Reordering levels:
 Subj

## Cell 10


In [15]:
# This code shows three ways to create a MultiIndex Series in pandas
import pandas as pd
import numpy as np

#from_arrays → builds MultiIndex from two parallel lists (arrays).
arrays = [["A", "A", "B", "B"], ["Math", "Science", "Math", "Science"]]
index = pd.MultiIndex.from_arrays(arrays, names=("Alphabet", "Subject"))
multi_s = pd.Series([90, 85, 88, 92], index=index)
print("MultiIndex Series from arrays:\n", multi_s, "\n")

#from_tuples → builds MultiIndex from explicit tuple pairs.
tuples = [("A", "Math"), ("A", "Science"), ("B", "Math"), ("B", "Science")]
index2 = pd.MultiIndex.from_tuples(tuples, names=("Alphabet", "Subject"))
multi_s2 = pd.Series([70, 75, 80, 82], index=index2)
print("MultiIndex Series from tuples:\n", multi_s2, "\n")

#from_product → builds MultiIndex from the cartesian product of iterables.
iterables = [["A", "B"], ["Math", "Science"]]
index3 = pd.MultiIndex.from_product(iterables, names=("Alphabet", "Subject"))
multi_s3 = pd.Series(np.random.randint(60, 100, size=4), index=index3)
print("MultiIndex Series from product:\n", multi_s3, "\n")


MultiIndex Series from arrays:
 Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64 

MultiIndex Series from tuples:
 Alphabet  Subject
A         Math       70
          Science    75
B         Math       80
          Science    82
dtype: int64 

MultiIndex Series from product:
 Alphabet  Subject
A         Math       78
          Science    93
B         Math       68
          Science    87
dtype: int64 



## Cell 11: Set DataFrame index



In [16]:
# Demonstrates creating pandas DataFrames with MultiIndex using arrays, tuples, Cartesian products, and existing columns
import pandas as pd
import numpy as np

arrays = [
    ["A", "A", "B", "B"],
    ["Math", "Science", "Math", "Science"]
]
index = pd.MultiIndex.from_arrays(arrays, names=("Alphabet", "Subject"))

df1 = pd.DataFrame(
    np.random.randint(50, 100, size=(4, 2)),
    index=index,
    columns=["Score1", "Score2"]
)
print("Example 1: MultiIndex from arrays\n", df1, "\n")

tuples = [
    ("A", "Math"),
    ("A", "Science"),
    ("B", "Math"),
    ("B", "Science")
]
index2 = pd.MultiIndex.from_tuples(tuples, names=("Alphabet", "Subject"))

df2 = pd.DataFrame(
    np.random.randn(4, 2),
    index=index2,
    columns=["Value1", "Value2"]
)
print("Example 2: MultiIndex from tuples\n", df2, "\n")

index3 = pd.MultiIndex.from_product(
    [["Group1", "Group2"], ["Math", "Science"]],
    names=("Group", "Subject")
)

df3 = pd.DataFrame(
    np.random.randint(1, 10, size=(4, 2)),
    index=index3,
    columns=["Col1", "Col2"]
)
print("Example 3: MultiIndex from product\n", df3, "\n")

data = {
    "Group": ["A", "A", "B", "B"],
    "Subject": ["Math", "Science", "Math", "Science"],
    "Score": [88, 92, 85, 90]
}

df4 = pd.DataFrame(data)
df4 = df4.set_index(["Group", "Subject"])  # setting multiple columns as index
print("Example 4: MultiIndex created from DataFrame columns\n", df4, "\n")


Example 1: MultiIndex from arrays
                   Score1  Score2
Alphabet Subject                
A        Math         62      66
         Science      55      68
B        Math         50      91
         Science      93      61 

Example 2: MultiIndex from tuples
                     Value1    Value2
Alphabet Subject                    
A        Math    -1.174428 -0.559026
         Science  0.676484  0.836867
B        Math     0.427847  0.321158
         Science  0.316807  0.466022 

Example 3: MultiIndex from product
                 Col1  Col2
Group  Subject            
Group1 Math        8     3
       Science     7     7
Group2 Math        7     6
       Science     3     8 

Example 4: MultiIndex created from DataFrame columns
                Score
Group Subject       
A     Math        88
      Science     92
B     Math        85
      Science     90 



## Cell 12: Code


In [17]:
# Creates a MultiIndex Series and demonstrates accessing data by first-level and specific multi-level indices
import pandas as pd

arrays = [["A", "A", "B", "B"], ["Math", "Science", "Math", "Science"]]
index = pd.MultiIndex.from_arrays(arrays, names=("Alphabet", "Subject"))
multi_s = pd.Series([90, 85, 88, 92], index=index)

print("Access all subjects for 'A':\n", multi_s.loc["A"], "\n")
print("Access specific element (B, Science):\n", multi_s.loc[("B", "Science")], "\n")

Access all subjects for 'A':
 Subject
Math       90
Science    85
dtype: int64 

Access specific element (B, Science):
 92 



## Cell 13


In [18]:
# Demonstrates slicing a MultiIndex Series by levels and selecting all entries for a specific sub-level
import pandas as pd

arrays = [["A", "A", "B", "B"], ["Math", "Science", "Math", "Science"]]
index = pd.MultiIndex.from_arrays(arrays, names=("Alphabet", "Subject"))
multi_s = pd.Series([90, 85, 88, 92], index=index)

print("Slicing from A to B:\n", multi_s.loc["A":"B"], "\n")
print("Partial slice for all Math:\n", multi_s.loc[:, "Math"], "\n")


Slicing from A to B:
 Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64 

Partial slice for all Math:
 Alphabet
A    90
B    88
dtype: int64 



## Cell 14


In [19]:
# Shows how to swap and reorder levels in a MultiIndex Series.
import pandas as pd

arrays = [["A", "A", "B", "B"], ["Math", "Science", "Math", "Science"]]
index = pd.MultiIndex.from_arrays(arrays, names=("Alphabet", "Subject"))
multi_s = pd.Series([90, 85, 88, 92], index=index)

print("Swapping levels:\n", multi_s.swaplevel(), "\n")
print("Reordering levels:\n", multi_s.reorder_levels(["Subject", "Alphabet"]), "\n")


Swapping levels:
 Subject  Alphabet
Math     A           90
Science  A           85
Math     B           88
Science  B           92
dtype: int64 

Reordering levels:
 Subject  Alphabet
Math     A           90
Science  A           85
Math     B           88
Science  B           92
dtype: int64 



## Cell 15


In [20]:
# Creates MultiIndex Series and DataFrame automatically using arrays of labels.
import pandas as pd
import numpy as np

multi_s_auto = pd.Series(
    np.random.randn(4),
    index=pd.MultiIndex.from_arrays([["A", "A", "B", "B"], ["X", "Y", "X", "Y"]])
)
print("MultiIndex Series constructed automatically:\n", multi_s_auto, "\n")

df_auto = pd.DataFrame(
    np.random.randn(4, 2),
    index=pd.MultiIndex.from_arrays([["Group1", "Group1", "Group2", "Group2"],
                                     ["One", "Two", "One", "Two"]]),
    columns=["Score1", "Score2"]
)
print("DataFrame with MultiIndex automatically:\n", df_auto, "\n")


MultiIndex Series constructed automatically:
 A  X    0.736201
   Y   -1.187706
B  X    0.535302
   Y   -0.476147
dtype: float64 

DataFrame with MultiIndex automatically:
               Score1    Score2
Group1 One -0.724699  0.804087
       Two -1.677721 -0.074029
Group2 One  0.318080  0.567285
       Two  0.233348 -0.657437 



## Cell 16



In [21]:
# Computes group-wise mean on a MultiIndex DataFrame and aligns the result with the original index
import pandas as pd

df = pd.DataFrame({
    "Math": [85, 90, 95, 80],
    "Science": [82, 88, 92, 84]
}, index=pd.MultiIndex.from_arrays([["A", "A", "B", "B"], ["one", "two", "one", "two"]]))
print("Original DataFrame:\n", df, "\n")

mean_by_group = df.groupby(level=0).mean()
print("Mean by group:\n", mean_by_group, "\n")

aligned = mean_by_group.reindex(df.index, level=0)
print("Reindexed to align with original index:\n", aligned, "\n")


Original DataFrame:
        Math  Science
A one    85       82
  two    90       88
B one    95       92
  two    80       84 

Mean by group:
    Math  Science
A  87.5     85.0
B  87.5     88.0 

Reindexed to align with original index:
        Math  Science
A one  87.5     85.0
  two  87.5     85.0
B one  87.5     88.0
  two  87.5     88.0 



## Cell 17


In [22]:
# Demonstrates sorting a MultiIndex Series and removing unused levels from a MultiIndex DataFrame.
import pandas as pd
import numpy as np

multi_s = pd.Series(
    np.random.randn(4),
    index=pd.MultiIndex.from_arrays([["B", "A", "B", "A"], ["X", "Y", "Y", "X"]])
)
print("Unsorted MultiIndex Series:\n", multi_s, "\n")
print("Sorted by index:\n", multi_s.sort_index(), "\n")

df = pd.DataFrame(
    np.random.randn(4, 2),
    index=pd.MultiIndex.from_arrays([["Group1", "Group1", "Group2", "Group2"], ["One", "Two", "One", "Two"]]),
    columns=pd.MultiIndex.from_arrays([["Score1", "Score2"], ["X", "Y"]])
)
print("Before removing unused levels:\n", df.columns.levels, "\n")
sub_df = df[["Score1"]]  # drop Score2
print("After removing unused levels:\n", sub_df.columns.remove_unused_levels().levels, "\n")


Unsorted MultiIndex Series:
 B  X   -1.886428
A  Y    0.841972
B  Y    1.038333
A  X   -1.082248
dtype: float64 

Sorted by index:
 A  X   -1.082248
   Y    0.841972
B  X   -1.886428
   Y    1.038333
dtype: float64 

Before removing unused levels:
 [['Score1', 'Score2'], ['X', 'Y']] 

After removing unused levels:
 [['Score1'], ['X']] 



## Cell 18


In [23]:
# Shows different ways to create pandas DataFrames from dictionaries of lists, dictionaries of Series, and NumPy arrays.
import pandas as pd
import numpy as np

data1 = {"Name": ["Alice", "Bob", "Charlie"],
         "Age": [24, 27, 22],
         "Score": [85, 90, 88]}
df1 = pd.DataFrame(data1)
print("DataFrame from dictionary of lists:\n", df1, "\n")

data2 = {"Math": pd.Series([90, 80, 85], index=["Alice", "Bob", "Charlie"]),
         "Science": pd.Series([88, 92, 84], index=["Alice", "Bob", "Charlie"])}
df2 = pd.DataFrame(data2)
print("DataFrame from dictionary of Series:\n", df2, "\n")

df3 = pd.DataFrame(np.arange(9).reshape(3, 3),
                   columns=["Col1", "Col2", "Col3"])
print("DataFrame from NumPy array:\n", df3, "\n")


DataFrame from dictionary of lists:
       Name  Age  Score
0    Alice   24     85
1      Bob   27     90
2  Charlie   22     88 

DataFrame from dictionary of Series:
          Math  Science
Alice      90       88
Bob        80       92
Charlie    85       84 

DataFrame from NumPy array:
    Col1  Col2  Col3
0     0     1     2
1     3     4     5
2     6     7     8 



## Cell 19


In [24]:
# Demonstrates accessing DataFrame columns and rows using labels and integer positions
import pandas as pd

data = {"Name": ["Alice", "Bob", "Charlie", "David"],
        "Age": [24, 27, 22, 30],
        "Score": [85, 90, 88, 95]}
df = pd.DataFrame(data)

print("Original DataFrame:\n", df, "\n")

print("Accessing single column (Score):\n", df["Score"], "\n")

print("Accessing multiple columns:\n", df[["Name", "Age"]], "\n")

print("Access row using loc:\n", df.loc[2], "\n")

print("Access row using iloc:\n", df.iloc[1], "\n")

Original DataFrame:
       Name  Age  Score
0    Alice   24     85
1      Bob   27     90
2  Charlie   22     88
3    David   30     95 

Accessing single column (Score):
 0    85
1    90
2    88
3    95
Name: Score, dtype: int64 

Accessing multiple columns:
       Name  Age
0    Alice   24
1      Bob   27
2  Charlie   22
3    David   30 

Access row using loc:
 Name     Charlie
Age           22
Score         88
Name: 2, dtype: object 

Access row using iloc:
 Name     Bob
Age       27
Score     90
Name: 1, dtype: object 



## Cell 20

In [25]:
# Shows various ways to slice rows, select columns, and filter a DataFrame based on conditions
import pandas as pd

data = {"Name": ["Alice", "Bob", "Charlie", "David", "Eva"],
        "Age": [24, 27, 22, 30, 28],
        "Score": [85, 90, 88, 95, 89]}
df = pd.DataFrame(data)

print("Original DataFrame:\n", df, "\n")

print("First three rows:\n", df[:3], "\n")

print("Rows 1 to 3:\n", df.loc[1:3], "\n")

print("Columns Name and Score:\n", df.loc[:, ["Name", "Score"]], "\n")

print("Rows where Score > 88:\n", df[df["Score"] > 88], "\n")


Original DataFrame:
       Name  Age  Score
0    Alice   24     85
1      Bob   27     90
2  Charlie   22     88
3    David   30     95
4      Eva   28     89 

First three rows:
       Name  Age  Score
0    Alice   24     85
1      Bob   27     90
2  Charlie   22     88 

Rows 1 to 3:
       Name  Age  Score
1      Bob   27     90
2  Charlie   22     88
3    David   30     95 

Columns Name and Score:
       Name  Score
0    Alice     85
1      Bob     90
2  Charlie     88
3    David     95
4      Eva     89 

Rows where Score > 88:
     Name  Age  Score
1    Bob   27     90
3  David   30     95
4    Eva   28     89 



## Cell 21

In [26]:
# Demonstrates adding, updating, and deleting columns and rows in a DataFrame.
import pandas as pd

df = pd.DataFrame({"Name": ["Alice", "Bob", "Charlie"],
                   "Age": [24, 27, 22]})
print("Original DataFrame:\n", df, "\n")

df["Score"] = [85, 90, 88]
print("After adding Score column:\n", df, "\n")

df.at[1, "Age"] = 28
print("After updating Age of Bob:\n", df, "\n")

df = df.drop("Score", axis=1)
print("After deleting Score column:\n", df, "\n")

df = df.drop(2, axis=0)
print("After deleting row with index 2:\n", df, "\n")

Original DataFrame:
       Name  Age
0    Alice   24
1      Bob   27
2  Charlie   22 

After adding Score column:
       Name  Age  Score
0    Alice   24     85
1      Bob   27     90
2  Charlie   22     88 

After updating Age of Bob:
       Name  Age  Score
0    Alice   24     85
1      Bob   28     90
2  Charlie   22     88 

After deleting Score column:
       Name  Age
0    Alice   24
1      Bob   28
2  Charlie   22 

After deleting row with index 2:
     Name  Age
0  Alice   24
1    Bob   28 



## Cell 22:other pandas capabilities

This cell shows a set of useful pandas commands (head, info, describe, handling missing values, groupby, datetime parsing, value_counts). It will use your `df` if present, otherwise it creates a sample DataFrame for demonstration.

In [1]:
import pandas as pd
import numpy as np

# Create sample DataFrame if not already defined
try:
    df  # noqa: F821
except NameError:
    df = pd.DataFrame({
        'Category': ['A', 'B', 'A', 'C', None],
        'Value': [10, 20, None, 40, 50],
        'Date': ['2020-01-01', '2020-01-02', '2020-02-01', None, '2020-02-03']
    })
    print('Created sample DataFrame "df" for demo purposes.')

print('\n--- df.head() ---')
display(df.head())

print('\n--- df.info() ---')
df.info()

print('\n--- df.describe() ---')
display(df.describe(include='all'))

print('\n--- Missing values per column ---')
print(df.isnull().sum())

# Fill missing numeric values with median if 'Value' exists
if 'Value' in df.columns and df['Value'].isnull().any():
    df['Value_filled'] = df['Value'].fillna(df['Value'].median())
    display(df[['Value', 'Value_filled']].head())

# Group by 'Category' and compute mean of 'Value'
if 'Category' in df.columns and 'Value' in df.columns:
    display(df.groupby('Category', dropna=False).agg({'Value': 'mean'}))

# Convert 'Date' column to datetime and show min/max
if 'Date' in df.columns:
    df['Date_parsed'] = pd.to_datetime(df['Date'], errors='coerce')
    display(df[['Date', 'Date_parsed']].head())
    print('Date range:', df['Date_parsed'].min(), 'to', df['Date_parsed'].max())

# Example of value_counts for 'Category'
if 'Category' in df.columns:
    print('\nCategory value counts:')
    print(df['Category'].value_counts(dropna=False))



Created sample DataFrame "df" for demo purposes.

--- df.head() ---


Unnamed: 0,Category,Value,Date
0,A,10.0,2020-01-01
1,B,20.0,2020-01-02
2,A,,2020-02-01
3,C,40.0,
4,,50.0,2020-02-03



--- df.info() ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Category  4 non-null      object 
 1   Value     4 non-null      float64
 2   Date      4 non-null      object 
dtypes: float64(1), object(2)
memory usage: 252.0+ bytes

--- df.describe() ---


Unnamed: 0,Category,Value,Date
count,4,4.0,4
unique,3,,4
top,A,,2020-01-01
freq,2,,1
mean,,30.0,
std,,18.257419,
min,,10.0,
25%,,17.5,
50%,,30.0,
75%,,42.5,



--- Missing values per column ---
Category    1
Value       1
Date        1
dtype: int64


Unnamed: 0,Value,Value_filled
0,10.0,10.0
1,20.0,20.0
2,,30.0
3,40.0,40.0
4,50.0,50.0


Unnamed: 0_level_0,Value
Category,Unnamed: 1_level_1
A,10.0
B,20.0
C,40.0
,50.0


Unnamed: 0,Date,Date_parsed
0,2020-01-01,2020-01-01
1,2020-01-02,2020-01-02
2,2020-02-01,2020-02-01
3,,NaT
4,2020-02-03,2020-02-03


Date range: 2020-01-01 00:00:00 to 2020-02-03 00:00:00

Category value counts:
Category
A       2
B       1
C       1
None    1
Name: count, dtype: int64
