In [30]:
import pandas as pd

In [31]:
# 1. Read CSV file into DataFrame
file_path = "/content/athlete_events.csv"
df = pd.read_csv(file_path)

In [32]:
# 2. Inspect the DataFrame
print("----- First 5 Rows -----")
print(df.head())

----- First 5 Rows -----
   ID                      Name Sex   Age  Height  Weight            Team  \
0   1                 A Dijiang   M  24.0   180.0    80.0           China   
1   2                  A Lamusi   M  23.0   170.0    60.0           China   
2   3       Gunnar Nielsen Aaby   M  24.0     NaN     NaN         Denmark   
3   4      Edgar Lindenau Aabye   M  34.0     NaN     NaN  Denmark/Sweden   
4   5  Christine Jacoba Aaftink   F  21.0   185.0    82.0     Netherlands   

   NOC        Games  Year  Season       City          Sport  \
0  CHN  1992 Summer  1992  Summer  Barcelona     Basketball   
1  CHN  2012 Summer  2012  Summer     London           Judo   
2  DEN  1920 Summer  1920  Summer  Antwerpen       Football   
3  DEN  1900 Summer  1900  Summer      Paris     Tug-Of-War   
4  NED  1988 Winter  1988  Winter    Calgary  Speed Skating   

                              Event Medal  
0       Basketball Men's Basketball   NaN  
1      Judo Men's Extra-Lightweight   NaN  
2

In [33]:
print("----- Last 5 Rows -----")
print(df.tail())

----- Last 5 Rows -----
            ID                                 Name Sex   Age  Height  Weight  \
239973  120250                           Peter Tich   M  31.0   178.0    68.0   
239974  120251                            Milan Tii   M  28.0   197.0    95.0   
239975  120252                 Edward James Tickell   M  51.0     NaN     NaN   
239976  120253                  Antti Sakari Tickln   M  29.0   175.0    70.0   
239977  120254  Charles Frederick "Charlie" Tickner   M  26.0     NaN     NaN   

                 Team  NOC        Games  Year  Season       City  \
239973       Slovakia  SVK  2000 Summer  2000  Summer     Sydney   
239974     Montenegro  MNE  2008 Summer  2008  Summer    Beijing   
239975  Great Britain  GBR  1912 Summer  1912  Summer  Stockholm   
239976        Finland  FIN  1988 Winter  1988  Winter    Calgary   
239977  United States  USA  1980 Winter    19     NaN        NaN   

                       Sport                                     Event Medal  
2

In [34]:
print("----- Data Types -----")
print(df.dtypes)

----- Data Types -----
ID          int64
Name       object
Sex        object
Age       float64
Height    float64
Weight    float64
Team       object
NOC        object
Games      object
Year        int64
Season     object
City       object
Sport      object
Event      object
Medal      object
dtype: object


In [35]:
print("----- Shape of Dataset (Rows, Columns) -----")
print(df.shape)

----- Shape of Dataset (Rows, Columns) -----
(239978, 15)


In [36]:
# 3. Compute Summary Statistics

print("----- Summary Statistics (Mean, Median, Min, Max, Count) -----")

summary_stats = df.select_dtypes(include='number').agg([
    'mean',
    'median',
    'min',
    'max',
    'count'
])

print(summary_stats)


----- Summary Statistics (Mean, Median, Min, Max, Count) -----
                   ID            Age         Height         Weight  \
mean     60505.485482      25.563961     175.363899      70.735999   
median   60746.000000      24.000000     175.000000      70.000000   
min          1.000000      10.000000     127.000000      25.000000   
max     120254.000000      96.000000     223.000000     214.000000   
count   239978.000000  231539.000000  186579.000000  184098.000000   

                 Year  
mean      1978.265679  
median    1988.000000  
min         19.000000  
max       2016.000000  
count   239978.000000  


In [37]:
# 4. Filter Rows
filtered_age = df[df['Age'] > 30]
print("----- Athletes Older Than 30 -----")
print(filtered_age.head())

----- Athletes Older Than 30 -----
    ID                  Name Sex   Age  Height  Weight            Team  NOC  \
3    4  Edgar Lindenau Aabye   M  34.0     NaN     NaN  Denmark/Sweden  DEN   
10   6       Per Knut Aaland   M  31.0   188.0    75.0   United States  USA   
11   6       Per Knut Aaland   M  31.0   188.0    75.0   United States  USA   
12   6       Per Knut Aaland   M  31.0   188.0    75.0   United States  USA   
13   6       Per Knut Aaland   M  31.0   188.0    75.0   United States  USA   

          Games  Year  Season         City                 Sport  \
3   1900 Summer  1900  Summer        Paris            Tug-Of-War   
10  1992 Winter  1992  Winter  Albertville  Cross Country Skiing   
11  1992 Winter  1992  Winter  Albertville  Cross Country Skiing   
12  1992 Winter  1992  Winter  Albertville  Cross Country Skiing   
13  1992 Winter  1992  Winter  Albertville  Cross Country Skiing   

                                                Event Medal  
3                  

In [38]:
# 5. Select Specific Columns
selected_columns = df[['Name', 'Sex', 'Age', 'Sport', 'Medal']]
print("----- Selected Columns -----")
print(selected_columns.head())

----- Selected Columns -----
                       Name Sex   Age          Sport Medal
0                 A Dijiang   M  24.0     Basketball   NaN
1                  A Lamusi   M  23.0           Judo   NaN
2       Gunnar Nielsen Aaby   M  24.0       Football   NaN
3      Edgar Lindenau Aabye   M  34.0     Tug-Of-War  Gold
4  Christine Jacoba Aaftink   F  21.0  Speed Skating   NaN


In [39]:
# 6. Slice Subsets (First 100 rows)
subset = df.iloc[:100]
print("----- First 100 Rows -----")
print(subset)

----- First 100 Rows -----
    ID                                Name Sex   Age  Height  Weight  \
0    1                           A Dijiang   M  24.0   180.0    80.0   
1    2                            A Lamusi   M  23.0   170.0    60.0   
2    3                 Gunnar Nielsen Aaby   M  24.0     NaN     NaN   
3    4                Edgar Lindenau Aabye   M  34.0     NaN     NaN   
4    5            Christine Jacoba Aaftink   F  21.0   185.0    82.0   
..  ..                                 ...  ..   ...     ...     ...   
95  32                Olav Augunson Aarnes   M  23.0     NaN     NaN   
96  33                 Mika Lauri Aarnikka   M  24.0   187.0    76.0   
97  33                 Mika Lauri Aarnikka   M  28.0   187.0    76.0   
98  34  Jamale (Djamel-) Aarrass (Ahrass-)   M  30.0   187.0    76.0   
99  35              Dagfinn Sverre Aarskog   M  24.0   190.0    98.0   

              Team  NOC        Games  Year  Season       City          Sport  \
0            China  CHN  199

In [40]:
# 8. Save Filtered Results to CSV and Excel
filtered_age.to_csv("athletes_above_30.csv", index=False)

print("Filtered files saved successfully!")

Filtered files saved successfully!
