In [28]:
import pandas as pd
import numpy as np

# Load the CSV file
df = pd.read_csv('student_data.csv')

# Group by Subject and calculate mean and standard deviation of Score
summary_stats = df.groupby('Subject')['Score'].agg(['mean', 'std']).reset_index()

# Rename columns for clarity (optional)
summary_stats.columns = ['Subject', 'Mean_Score', 'Std_Deviation']

print(summary_stats)


   Subject  Mean_Score  Std_Deviation
0  Science       88.25       7.410578
1    maths       85.00       4.082483


In [16]:

# Load the data
df = pd.read_csv('student_data.csv')

# Find the top student for each subject
top_students = df.loc[df.groupby('Subject')['Score'].idxmax()].reset_index(drop=True)

print(top_students)


  Name  Age  Score  Subject
0  Bob   25     95  Science
1  Bob   25     90    maths


In [22]:

# Load the CSV file
df = pd.read_csv('student_data.csv')

# Generate random Attendance values between 80 and 100
np.random.seed(0)  # Optional: for reproducibility
df['Attendance'] = np.random.randint(80, 101, size=len(df))

# Set MultiIndex with Subject and Name
multiindex_df = df.set_index(['Subject', 'Name'])[['Score', 'Attendance']]

print(multiindex_df)


                 Score  Attendance
Subject Name                      
maths   Alice       85          92
        Bob         90          95
Science Charlie     78          80
        Diana       92          83
        Alice       88          83
        Bob         95          87
maths   Charlie     80          89
        Diana       85          99


In [32]:

# Load the data
df = pd.read_csv('student_data.csv')

# Group by Name and calculate average Score
average_scores = df.groupby('Name')['Score'].mean().reset_index()

# Optional: rename column for clarity
average_scores.columns = ['Name', 'Average_Score']

print(average_scores)


      Name  Average_Score
0    Alice           86.5
1      Bob           92.5
2  Charlie           79.0
3    Diana           88.5


In [34]:

# Load the CSV file
df = pd.read_csv('student_data.csv')

# Create pivot table
pivot_table = df.pivot_table(index='Name', columns='Subject', values='Score')

print(pivot_table)


Subject  Science  maths
Name                   
Alice       88.0   85.0
Bob         95.0   90.0
Charlie     78.0   80.0
Diana       92.0   85.0


In [36]:
# Melt the pivot table
melted_df = pivot_table.reset_index().melt(id_vars='Name', 
                                            var_name='Subject', 
                                            value_name='Score')

print(melted_df)


      Name  Subject  Score
0    Alice  Science   88.0
1      Bob  Science   95.0
2  Charlie  Science   78.0
3    Diana  Science   92.0
4    Alice    maths   85.0
5      Bob    maths   90.0
6  Charlie    maths   80.0
7    Diana    maths   85.0


In [38]:

# Create date range
dates = pd.date_range(start='2025-01-01', periods=30, freq='D')

# Generate random sales data
np.random.seed(0)  # Optional: for reproducibility
sales = np.random.randint(100, 501, size=30)

# Create the DataFrame
sales_df = pd.DataFrame({
    'Date': dates,
    'Sales': sales
})

print(sales_df)


         Date  Sales
0  2025-01-01    272
1  2025-01-02    147
2  2025-01-03    217
3  2025-01-04    292
4  2025-01-05    423
5  2025-01-06    351
6  2025-01-07    295
7  2025-01-08    459
8  2025-01-09    109
9  2025-01-10    311
10 2025-01-11    377
11 2025-01-12    342
12 2025-01-13    392
13 2025-01-14    187
14 2025-01-15    170
15 2025-01-16    188
16 2025-01-17    496
17 2025-01-18    414
18 2025-01-19    293
19 2025-01-20    139
20 2025-01-21    187
21 2025-01-22    274
22 2025-01-23    188
23 2025-01-24    437
24 2025-01-25    265
25 2025-01-26    125
26 2025-01-27    433
27 2025-01-28    172
28 2025-01-29    365
29 2025-01-30    215


In [40]:
# Set Date as index
sales_df.set_index('Date', inplace=True)

# Resample to weekly frequency, summing Sales
weekly_sales = sales_df.resample('W').sum()

print(weekly_sales)


            Sales
Date             
2025-01-05   1351
2025-01-12   2244
2025-01-19   2140
2025-01-26   1615
2025-02-02   1185


In [42]:
# Assuming 'Date' is already the index and datetime type
sales_df['7d_Rolling_Mean'] = sales_df['Sales'].rolling(window=7).mean()

print(sales_df)


            Sales  7d_Rolling_Mean
Date                              
2025-01-01    272              NaN
2025-01-02    147              NaN
2025-01-03    217              NaN
2025-01-04    292              NaN
2025-01-05    423              NaN
2025-01-06    351              NaN
2025-01-07    295       285.285714
2025-01-08    459       312.000000
2025-01-09    109       306.571429
2025-01-10    311       320.000000
2025-01-11    377       332.142857
2025-01-12    342       320.571429
2025-01-13    392       326.428571
2025-01-14    187       311.000000
2025-01-15    170       269.714286
2025-01-16    188       281.000000
2025-01-17    496       307.428571
2025-01-18    414       312.714286
2025-01-19    293       305.714286
2025-01-20    139       269.571429
2025-01-21    187       269.571429
2025-01-22    274       284.428571
2025-01-23    188       284.428571
2025-01-24    437       276.000000
2025-01-25    265       254.714286
2025-01-26    125       230.714286
2025-01-27    433   