In [3]:
import pandas as pd
import numpy as np

In [5]:
# Create a Series
scores = pd.Series([85, 90, 78, 92], index=['Alice', 'Bob', 'Charlie', 'Diana'])
print(scores)


Alice      85
Bob        90
Charlie    78
Diana      92
dtype: int64


In [7]:
print(scores['Bob'])  # Access by index

90


In [44]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', None],
    'Score': [85, None, 78, 92]
}
df = pd.DataFrame(data)
print(df)

      Name  Score
0    Alice   85.0
1      Bob    NaN
2  Charlie   78.0
3     None   92.0


In [46]:
# Check for missing values
print(df.isna())

    Name  Score
0  False  False
1  False   True
2  False  False
3   True  False


In [50]:
# Fill missing values
df['Score'] = df['Score'].fillna(df['Score'].mean())
df['Name'] = df['Name'].fillna('Unknown')
print(df)

      Name  Score
0    Alice   85.0
1      Bob   85.0
2  Charlie   78.0
3  Unknown   92.0


In [54]:
# Drop rows with missing values
df_dropped = df.dropna()
print(df)

      Name  Score
0    Alice   85.0
1      Bob   85.0
2  Charlie   78.0
3  Unknown   92.0


In [56]:
# Create two DataFrames
df1 = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Score': [85, 90, 78]
})
df2 = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Diana'],
    'Grade': ['A', 'B', 'A']
})
# Merge on 'Name'
merged_df = pd.merge(df1, df2, on='Name', how='outer')
print(merged_df)

      Name  Score Grade
0    Alice   85.0     A
1      Bob   90.0     B
2  Charlie   78.0   NaN
3    Diana    NaN     A


In [60]:
# Group by Age and calculate mean Score
grouped = df.groupby('Name')['Score'].agg(['mean', 'count'])
print(grouped)

         mean  count
Name                
Alice    85.0      1
Bob      85.0      1
Charlie  78.0      1
Unknown  92.0      1


In [9]:
# Create a DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'Age': [22, 25, 19, 30],
    'Score': [85, 90, 78, 92]
}
df = pd.DataFrame(data)
print(df)

      Name  Age  Score
0    Alice   22     85
1      Bob   25     90
2  Charlie   19     78
3    Diana   30     92


In [62]:
# Create a DataFrame with MultiIndex
arrays = [
    ['Math', 'Math', 'Science', 'Science'],
    ['Alice', 'Bob', 'Alice', 'Bob']
]
index = pd.MultiIndex.from_arrays(arrays, names=('Subject', 'Name'))
data = pd.DataFrame({'Score': [85, 90, 88, 92]}, index=index)
print(data)
# Access data
print(data.loc['Math', 'Alice'])

               Score
Subject Name        
Math    Alice     85
        Bob       90
Science Alice     88
        Bob       92
Score    85
Name: (Math, Alice), dtype: int64


In [66]:
# Pivot table
pivot_df = df.pivot_table(values='Score', index='Name', columns='Subject', aggfunc='mean')
print(pivot_df)


KeyError: 'Subject'

In [68]:
# Melt DataFrame
melted_df = pd.melt(df, id_vars=['Name'], value_vars=['Score'], var_name='Metric')
print(melted_df)

      Name Metric  value
0    Alice  Score   85.0
1      Bob  Score   85.0
2  Charlie  Score   78.0
3  Unknown  Score   92.0


In [70]:
# Create time series data
dates = pd.date_range('2025-01-01', periods=5, freq='D')
data = pd.DataFrame({'Value': [10, 20, 15, 25, 30]}, index=dates)
# Resample to weekly mean
weekly = data.resample('W').mean()
print(weekly)

            Value
2025-01-05   20.0


In [72]:
# Categorize scores
def categorize_score(score):
    if score >= 90:
        return 'Excellent'
    elif score >= 80:
        return 'Good'
    else:
        return 'Needs Improvement'
df['Category'] = df['Score'].apply(categorize_score)
print(df)

      Name  Score           Category
0    Alice   85.0               Good
1      Bob   85.0               Good
2  Charlie   78.0  Needs Improvement
3  Unknown   92.0          Excellent


In [33]:
# Load CSV
df = pd.read_csv('Students Dataset.csv')
print(df.head(1000))  # First 1000 rows


    Student_ID First_Name Last_Name                       Email  Gender  \
0        S3776      Ahmed  Williams  student2776@university.com  Female   
1        S2990      Maria     Brown  student1990@university.com  Female   
2        S1131      Ahmed   Johnson   student131@university.com  Female   
3        S5267      Maria     Brown  student4267@university.com    Male   
4        S2458       Emma   Johnson  student1458@university.com  Female   
..         ...        ...       ...                         ...     ...   
995      S5627       Emma     Davis  student4627@university.com  Female   
996      S2502       Emma     Jones  student1502@university.com  Female   
997      S1145       Omar   Johnson   student145@university.com    Male   
998      S1632      Ahmed   Johnson   student632@university.com  Female   
999      S1390       John     Smith   student390@university.com    Male   

           Age   Department  Attendance (%)  Midterm_Score  Final_Score  ...  \
0    22.045035     

In [40]:
# Create DataFrame with missing values
data = {
    'Name': ['Alice', 'Bob', 'Charlie', None],
    'Score': [85, None, 78, 92]
}
df = pd.DataFrame(data)
# Check for missing values
print(df.isna())
# Fill missing values
df['Score'] = df['Score'].fillna(df['Score'].mean())
df['Name'] = df['Name'].fillna('Unknown')
# Drop rows with missing values
df_dropped = df.dropna()

    Name  Score
0  False  False
1  False   True
2  False  False
3   True  False


In [15]:
print(df.info())  # Data types and missing values


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 23 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Student_ID                  1000 non-null   object 
 1   First_Name                  1000 non-null   object 
 2   Last_Name                   1000 non-null   object 
 3   Email                       1000 non-null   object 
 4   Gender                      1000 non-null   object 
 5   Age                         1000 non-null   float64
 6   Department                  1000 non-null   object 
 7   Attendance (%)              1000 non-null   float64
 8   Midterm_Score               1000 non-null   float64
 9   Final_Score                 1000 non-null   float64
 10  Assignments_Avg             1000 non-null   float64
 11  Quizzes_Avg                 1000 non-null   float64
 12  Participation_Score         1000 non-null   float64
 13  Projects_Score              1000 n

In [17]:
print(df.describe())  # Summary statistics

               Age  Attendance (%)  Midterm_Score  Final_Score  \
count  1000.000000     1000.000000    1000.000000  1000.000000   
mean     21.066634       68.061064      71.043648    69.296683   
std       2.311069       27.809824      17.350306    17.953780   
min      16.206210        0.000000      36.833650    36.745235   
25%      19.316787       59.161689      56.760185    54.255031   
50%      20.993244       73.283685      71.263025    69.131524   
75%      22.718634       87.093181      85.061684    83.092263   
max      26.380494      108.730516     108.914349   108.508363   

       Assignments_Avg  Quizzes_Avg  Participation_Score  Projects_Score  \
count      1000.000000  1000.000000          1000.000000     1000.000000   
mean         66.806472    75.394758             4.988787       75.023214   
std          26.825467    15.143672             2.882270       14.993117   
min           0.000000    46.120694             0.009666       46.580692   
25%          56.898954   

In [23]:
age = df['Age']
print(age)

0      22.045035
1      23.629150
2      22.506026
3      21.467511
4      22.919975
         ...    
995    23.814906
996    21.557628
997    21.631017
998    19.420526
999    24.328379
Name: Age, Length: 1000, dtype: float64


In [27]:
sorted_df = df.sort_values('Age', ascending=False)

In [29]:
print(sorted_df)

    Student_ID First_Name Last_Name                       Email  Gender  \
122      S4817       Liam     Jones  student3817@university.com    Male   
129      S2339       Emma     Jones  student1339@university.com    Male   
245      S1546      Ahmed     Smith   student546@university.com  Female   
616      S4577       Omar     Smith  student3577@university.com  Female   
519      S3119       Liam     Jones  student2119@university.com    Male   
..         ...        ...       ...                         ...     ...   
161      S3552        Ali     Smith  student2552@university.com    Male   
559      S1252      Maria     Jones   student252@university.com  Female   
557      S4434       Liam     Davis  student3434@university.com    Male   
807      S4498       Sara     Smith  student3498@university.com  Female   
818      S3172      Ahmed     Jones  student2172@university.com  Female   

           Age   Department  Attendance (%)  Midterm_Score  Final_Score  ...  \
122  26.380494     