## Pandas Options

In [1]:
import pandas as pd

In [2]:
# Set Pandas options for wrapping
pd.set_option('display.max_columns', None)   # Show all columns
pd.set_option('display.width', 100)          # Set line width before wrapping
pd.set_option('display.max_colwidth', None)  # Don't truncate text

In [3]:
sheet_id = '1jPk4sZyDn5NSIQ4iQK1P3aGh5ZAS2oaYexRoUyocAw0'
gid1 = '132995188' #pData1
gid2 = '330609257' #'sData1

In [4]:
url1 = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid1}"
print(url1)
url2 = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid2}"
print(url2)

https://docs.google.com/spreadsheets/d/1jPk4sZyDn5NSIQ4iQK1P3aGh5ZAS2oaYexRoUyocAw0/export?format=csv&gid=132995188
https://docs.google.com/spreadsheets/d/1jPk4sZyDn5NSIQ4iQK1P3aGh5ZAS2oaYexRoUyocAw0/export?format=csv&gid=330609257


In [5]:
pdata = pd.read_csv(url1)
sdata = pd.read_csv(url2)

In [6]:
merged = pd.merge(pdata, sdata, on='rollno' , how = 'inner')
merged.head()

Unnamed: 0,rollno,name,gender,dob,height,program,maths,english,stats,python
0,1,Jugurtha,Male,2000-08-16,156,BCA,,,63.0,40.0
1,2,Amandeep,Female,2000-02-21,170,BBA,83.0,47.0,63.0,50.0
2,3,Barkat,Female,2001-07-17,158,BCA,90.0,56.0,47.0,74.0
3,4,Suroor,Female,2001-05-16,188,BBA,74.0,72.0,98.0,
4,5,Sultana,Male,2001-04-02,167,BCA,81.0,78.0,,80.0


In [7]:
subvar = ['maths','english', 'stats','python']
idvar = ['rollno','name','gender','program']

In [8]:
# Mean marks by program
merged.groupby("program")[subvar].mean(numeric_only=True)

Unnamed: 0_level_0,maths,english,stats,python
program,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BBA,71.5,62.2,74.307692,74.428571
BCA,76.666667,68.5,64.166667,61.928571


In [9]:
# Max marks by Gender
merged.groupby("gender")[subvar].max(numeric_only=True)

Unnamed: 0_level_0,maths,english,stats,python
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,94.0,83.0,98.0,81.0
Male,99.0,96.0,98.0,94.0


In [10]:
# Multiple aggregations (mean, max, count) with named agg

In [11]:
(
    merged
    .groupby("program")
    .agg(
        mean_maths=("maths", "mean"),
        max_maths =("maths", "max"),
        count_rows =("rollno", "count")
    )
)

Unnamed: 0_level_0,mean_maths,max_maths,count_rows
program,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BBA,71.5,96.0,16
BCA,76.666667,99.0,14


In [12]:
# Aggregations across all subjects at once

In [13]:
(
    merged
    .groupby("program")[subvar]
    .agg(["mean", "median", "max", "min", "count"])
)

Unnamed: 0_level_0,maths,maths,maths,maths,maths,english,english,english,english,english,stats,stats,stats,stats,stats,python,python,python,python,python
Unnamed: 0_level_1,mean,median,max,min,count,mean,median,max,min,count,mean,median,max,min,count,mean,median,max,min,count
program,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
BBA,71.5,72.5,96.0,40.0,14,62.2,67.0,88.0,40.0,15,74.307692,72.0,98.0,50.0,13,74.428571,77.0,97.0,47.0,14
BCA,76.666667,78.5,99.0,53.0,12,68.5,67.5,96.0,46.0,12,64.166667,63.0,92.0,40.0,12,61.928571,62.5,83.0,40.0,14


In [14]:
# Group by Program + Gender

In [15]:
(
    merged
    .groupby(["program", "gender"])[subvar]
    .mean(numeric_only=True)
    .round(2)
    .reset_index()
)

Unnamed: 0,program,gender,maths,english,stats,python
0,BBA,Female,78.2,65.83,79.0,61.6
1,BBA,Male,68.88,61.29,72.0,81.57
2,BCA,Female,72.8,69.17,56.83,67.83
3,BCA,Male,79.43,67.83,71.5,57.5


In [None]:
#  Pivot-style summary (like Excel pivot)

In [24]:
pd.pivot_table(
    merged,
    index="program",
    values=subvar,
    aggfunc={"maths": ["mean","max"], "english": ["mean","max"], "stats": "mean", "python": "mean"}
).round(2).reset_index()

Unnamed: 0_level_0,program,english,english,maths,maths,python,stats
Unnamed: 0_level_1,Unnamed: 1_level_1,max,mean,max,mean,mean,mean
0,BBA,88.0,62.2,96.0,71.5,74.43,74.31
1,BCA,96.0,68.5,99.0,76.67,61.93,64.17


## Additional Columns
- Total
- Average

In [18]:
merged.assign (
    Total_Marks = merged[subvar].sum(axis=1, skipna=True),
    Avg_Marks   = merged[subvar].mean(axis=1, skipna=True)
)

Unnamed: 0,rollno,name,gender,dob,height,program,maths,english,stats,python,Total_Marks,Avg_Marks
0,1,Student_1,Male,2000-08-16,156,BCA,,,63.0,40.0,103.0,51.5
1,2,Student_2,,2000-02-21,170,BBA,83.0,47.0,63.0,50.0,243.0,60.75
2,3,Student_3,Female,2001-07-17,158,BCA,90.0,56.0,47.0,74.0,267.0,66.75
3,4,Student_4,Female,2001-05-16,188,BBA,74.0,72.0,98.0,,244.0,81.333333
4,5,Student_5,Male,2001-04-02,167,BCA,81.0,78.0,,80.0,239.0,79.666667
5,6,Student_6,Male,2000-10-12,153,BCA,67.0,46.0,48.0,47.0,208.0,52.0
6,7,Student_7,Male,2000-07-28,174,BBA,51.0,73.0,72.0,87.0,283.0,70.75
7,8,Student_8,Female,2000-06-27,163,BCA,94.0,62.0,63.0,76.0,295.0,73.75
8,9,Student_9,Female,2002-05-14,158,BBA,74.0,83.0,79.0,61.0,297.0,74.25
9,10,Student_10,Female,2000-03-06,175,BCA,66.0,74.0,40.0,74.0,254.0,63.5


## Handling missing marks
- By default, Pandas ignores NaN in mean, max, etc. If you want to require all marks present, drop rows first:

In [16]:
merged.dropna(subset=subvar).groupby("program")[subvar].mean()  
# here we removed rows have missing values in any of the subvar columns list

Unnamed: 0_level_0,maths,english,stats,python
program,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BBA,65.9,58.8,72.0,77.1
BCA,75.666667,66.444444,63.333333,66.555556


In [17]:
merged.groupby("program")[subvar].mean(numeric_only=True)

Unnamed: 0_level_0,maths,english,stats,python
program,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BBA,71.5,62.2,74.307692,74.428571
BCA,76.666667,68.5,64.166667,61.928571


In [None]:
#  Count of students with any missing subject by Program

In [19]:
(
    merged
    .assign(AnyMissing = merged[subvar].isna().any(axis=1))
    .groupby("program")["AnyMissing"]
    .sum()
    .rename("Students_with_Missing")
)

program
BBA    6
BCA    5
Name: Students_with_Missing, dtype: int64

## Tricks
- Top Students

In [22]:
def top_scorer(df, col):
    idx = merged[col].idxmax(skipna=True)
    return merged.loc[idx, ["rollno", "name", col]]

tops = {s: top_scorer(merged, s) for s in subvar}
pd.DataFrame(tops)  # dict of small DataFrames

Unnamed: 0,maths,english,stats,python
english,,96.0,,
maths,99.0,,,
name,Student_15,Student_22,Student_4,Student_24
python,,,,97.0
rollno,15,22,4,24
stats,,,98.0,


## End here