<a href="https://colab.research.google.com/github/ahmeda335/Pandas/blob/main/Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing pandas

In [240]:
import pandas as pd
import numpy as np

# Creating a DataFrame.

In [241]:
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [242]:
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns = ["A", "B", "C"])
df.head()

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


# Information and description of the DataFrame.

In [243]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       3 non-null      int64
 1   B       3 non-null      int64
 2   C       3 non-null      int64
dtypes: int64(3)
memory usage: 200.0 bytes


In [244]:
df.describe()

Unnamed: 0,A,B,C
count,3.0,3.0,3.0
mean,4.0,5.0,6.0
std,3.0,3.0,3.0
min,1.0,2.0,3.0
25%,2.5,3.5,4.5
50%,4.0,5.0,6.0
75%,5.5,6.5,7.5
max,7.0,8.0,9.0


In [245]:
df.shape

(3, 3)

In [246]:
df.nunique()  # Getting number of elements for each row.

Unnamed: 0,0
A,3
B,3
C,3


In [247]:
df['B'].unique()  # Getting the elements of the column specified.

array([2, 5, 8])

# Playing with indexes.

In [248]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [249]:
df.index.tolist()  # showing the normal indexs in a list.

[0, 1, 2]

In [250]:
df_indexed = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns = ["A", "B", "C"], index = ["a", "b", "c"])
df_indexed

Unnamed: 0,A,B,C
a,1,2,3
b,4,5,6
c,7,8,9


In [251]:
df_indexed = df_indexed.set_index("A")  # Setting the column 'A' as the index.

In [252]:
df_indexed

Unnamed: 0_level_0,B,C
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2,3
4,5,6
7,8,9


In [253]:
df_indexed.index

Index([1, 4, 7], dtype='int64', name='A')

In [254]:
df_indexed.reset_index()  # Resetting the index.

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


# Displaying the data.

In [255]:
df.head()  # showing the first '5' rows.

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [256]:
df.head(2)  # showing the first '2' rows.

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6


In [257]:
df.tail()  # showing the last '5' rows.

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [258]:
df.tail(2)  # showing the last '2' rows.

Unnamed: 0,A,B,C
1,4,5,6
2,7,8,9


In [259]:
display(df.sample(2))  # Getting '2' random data.
display(df.sample(2, random_state=200))  # This will give me the same random data unless I change number '200'.

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6


Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6


# Accessing the Data

In [260]:
display(df.loc[[1, 2], ["B", "C"]])  # It is in the shape 'loc[columns, rows]'
display(df.loc[0:2, ['A', 'B']])

display(df.iloc[[1, 2], [0, 1]])  # It is in the shape 'iloc[rows, columns]' but in indexes.
display(df.iloc[0:2, [0, 1]])

Unnamed: 0,B,C
1,5,6
2,8,9


Unnamed: 0,A,B
0,1,2
1,4,5
2,7,8


Unnamed: 0,A,B
1,4,5
2,7,8


Unnamed: 0,A,B
0,1,2
1,4,5


In [261]:
# Changing the value of a cell
df.loc[1, 'A'] = 44
df

Unnamed: 0,A,B,C
0,1,2,3
1,44,5,6
2,7,8,9


In [262]:
df.at[0, "A"]
df.iat[1, 1]

5

In [263]:
# Grapping Columns
display(df['A'])
display(df.A)
display(type(df['A']))

Unnamed: 0,A
0,1
1,44
2,7


Unnamed: 0,A
0,1
1,44
2,7


In [264]:
# Sorting the rows
df.sort_values('A', ascending=False)

Unnamed: 0,A,B,C
1,44,5,6
2,7,8,9
0,1,2,3


In [265]:
# Iterating over the rows using for loop.
for index, row in df.iterrows():
  print(index)
  print(row)
  print('\n\n\n')

0
A    1
B    2
C    3
Name: 0, dtype: int64




1
A    44
B     5
C     6
Name: 1, dtype: int64




2
A    7
B    8
C    9
Name: 2, dtype: int64






# Filtering the Data.

In [266]:
athlete = pd.read_csv('https://raw.githubusercontent.com/KeithGalli/complete-pandas-tutorial/refs/heads/master/data/bios.csv').set_index('athlete_id')
athlete.head(10)

Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25
6,Nicolas Chatelain,1970-01-13,Amiens,Somme,FRA,France,181.0,70.0,
7,Patrick Chila,1969-11-27,Ris-Orangis,Essonne,FRA,France,180.0,73.0,
8,Henri Cochet,1901-12-14,Villeurbanne,Rhône,FRA,France,,,1987-04-02
9,Marcel Cousin,1896-08-04,Nîmes,Gard,FRA,France,,,1986-08-01
10,Guy de la Chapelle,1868-07-16,Farges-Allichamps,Cher,FRA,France,,,1923-08-27


In [267]:
display(athlete.loc[athlete['height_cm'] > 180, ['name', 'height_cm']].head())  # If I want to specify columns, I use 'loc'.
display(athlete[athlete['height_cm'] > 180].head())   # Here, I didn't use 'loc' but I can't specify columns.

Unnamed: 0_level_0,name,height_cm
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2,Arnaud Boetsch,183.0
3,Jean Borotra,183.0
6,Nicolas Chatelain,181.0
17,Guy Forget,189.0
26,Henri Leconte,184.0


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
6,Nicolas Chatelain,1970-01-13,Amiens,Somme,FRA,France,181.0,70.0,
17,Guy Forget,1965-01-04,Casablanca,Casablanca-Settat,MAR,France,189.0,79.0,
26,Henri Leconte,1963-07-04,Lillers,Pas-de-Calais,FRA,France,184.0,78.0,


In [268]:
athlete.loc[athlete['height_cm'] > 180].info()

<class 'pandas.core.frame.DataFrame'>
Index: 35368 entries, 2 to 149221
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          35368 non-null  object 
 1   born_date     35359 non-null  object 
 2   born_city     30082 non-null  object 
 3   born_region   30082 non-null  object 
 4   born_country  30082 non-null  object 
 5   NOC           35368 non-null  object 
 6   height_cm     35368 non-null  float64
 7   weight_kg     33882 non-null  float64
 8   died_date     4583 non-null   object 
dtypes: float64(2), object(7)
memory usage: 2.7+ MB


In [269]:
# Using Two conditions

athlete.loc[(athlete['height_cm'] > 180) & (athlete['weight_kg'] > 90), ['name', 'height_cm', 'weight_kg']].head()

Unnamed: 0_level_0,name,height_cm,weight_kg
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
519,Detlef Kahlert,184.0,92.0
526,Bernhard Schulkowski,195.0,105.0
634,Mark Philippoussis,193.0,91.0
651,Karl Jindrak,185.0,98.0
797,Árni Þór Hallgrímson,192.0,97.0


In [270]:
# Displaying the string values which contain specific characters. Using (Regex)

display(athlete.loc[athlete.name.str.contains("Ahmed")].sample(5, random_state=200))
display(athlete.loc[athlete.name.str.contains("AHMED", case=False)].sample(5, random_state=200))  # When I put 'case=False' It is not case sensetive now. 'AHMED' like 'ahmed'

# Getting cities that starts with vowels
display(athlete.loc[athlete.born_city.str.contains(r'^[AEIOUaeiou]', na=False)].sample(5))

# Find athletes with names ending with 'son' or 'sen'
display(athlete.loc[athlete.name.str.contains(r'son$|sen$', case=False, na=False)].sample(5))

# Find athletes with names has repeated letters like 'ahmmad'.
display(athlete.loc[athlete.name.str.contains(r'(.)\1', na=False)].sample(5))

# If I want to stop the (Regex) search.
display(athlete.loc[athlete.name.str.contains('AHMED', case=False, regex=False)].sample(5))

Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
19972,Bashir Ahmed,1934-12-23,Karachi,Sindh,PAK,Pakistan,171.0,67.0,
124613,Ahmed Awad,1987-01-01,,,,Egypt,165.0,66.0,
129923,Samia Ahmed,1996-01-20,Al-Qahira (Cairo),Al-Qahira,EGY,Egypt,170.0,57.0,
66574,Ahmed Douhou,1976-12-14,Bouaké,Vallée du Bandama,CIV,Côte d'Ivoire France,190.0,94.0,
31583,Ahmed Debes,1970-01-14,,,,Egypt,,,


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
19972,Bashir Ahmed,1934-12-23,Karachi,Sindh,PAK,Pakistan,171.0,67.0,
124613,Ahmed Awad,1987-01-01,,,,Egypt,165.0,66.0,
129923,Samia Ahmed,1996-01-20,Al-Qahira (Cairo),Al-Qahira,EGY,Egypt,170.0,57.0,
66574,Ahmed Douhou,1976-12-14,Bouaké,Vallée du Bandama,CIV,Côte d'Ivoire France,190.0,94.0,
31583,Ahmed Debes,1970-01-14,,,,Egypt,,,


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
70771,Andreas Kouvelogiannis,1939-03-07,Athina (Athens),Attiki,GRE,Greece,178.0,80.0,
10628,Mihai Zafiu,1949-06-09,Albești,Botoșani,ROU,Romania,187.0,87.0,
114912,Anastasiya Pilipenko,1986-09-13,Almaty,Almaty,KAZ,Kazakhstan,174.0,55.0,
68697,Ann Johnson,1933-09-28,Elham,England,GBR,Great Britain,175.0,64.0,
88586,Adolf Koxeder,1934-10-09,Innsbruck,Tirol,AUT,Austria,180.0,85.0,


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
87572,Arne Johansson,1915-02-25,Enköping,Uppsala,SWE,Sweden,,,1956-10-12
11983,Harry Freeman-Jackson,1910-12-23,Rawalpindi,Punjab,PAK,Ireland,175.0,63.0,1993-07-21
112001,Therese Helgesson,1983-07-22,Stockholm,Stockholm,SWE,Sweden,172.0,64.0,
66938,Julius Jørgensen,1880-06-20,København (Copenhagen),Hovedstaden,DEN,Denmark,,,1937-10-03
52061,Jorge Edson,1966-10-13,Porto Alegre,Rio Grande do Sul,BRA,Brazil,192.0,90.0,


  display(athlete.loc[athlete.name.str.contains(r'(.)\1', na=False)].sample(5))


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
90885,Tracey Wheeler,1967-09-26,Sydney,New South Wales,AUS,Australia,180.0,74.0,
25263,Bill Neil,1939-05-22,Airdrie,Scotland,GBR,Great Britain,183.0,73.0,2014-09-22
52397,Gabriella Csapó-Fekete,1954-08-23,Nyíregyháza,Szabolcs-Szatmár-Bereg,HUN,Hungary,176.0,61.0,2023-06-19
126388,Elise Chabbey,1993-04-24,Genève (Geneva),Genève,SUI,Switzerland,164.0,56.0,
31680,Philippe Gardent,1964-03-15,Belleville,Rhône,FRA,France,184.0,98.0,


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
66574,Ahmed Douhou,1976-12-14,Bouaké,Vallée du Bandama,CIV,Côte d'Ivoire France,190.0,94.0,
29733,Ahmed Amin Tabouzada,,,,,Egypt,,,
13322,Jamal Ahmed Al-Doseri,1970-01-01,,,,Bahrain,155.0,70.0,
52736,Ahmed Zoubi,,,,,Libya,,,
52729,Ahmed El-Faghei,,,,,Libya,,,


In [271]:
# using 'isin' function.

display(athlete.loc[athlete.born_country.isin(['EGY'])].sample(5))

display(athlete.loc[athlete.born_country.isin(["EGy"]) & athlete.name.str.contains(r"m")])

Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
53705,Ibrahim Abdel Rahman,1940-01-01,Al-Qahira (Cairo),Al-Qahira,EGY,United Arab Republic,,,
67088,Ibrahim Okasha,1911-01-01,Al-Qahira (Cairo),Al-Qahira,EGY,Egypt,,,
24793,Mohamed Reda Hamad,1939-11-09,Al-Qahira (Cairo),Al-Qahira,EGY,Egypt,,,
1848,Kabary Salem,1968-02-12,Al-Iskanderiya (Alexandria),Al-Iskanderiya,EGY,Egypt,,,
67067,Mahmoud Attef,1938-01-01,Al-Qahira (Cairo),Al-Qahira,EGY,United Arab Republic,,,


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


# Applying on an existance file. 'Coffee Shop.csv'

In [272]:
coffee = pd.read_csv('https://raw.githubusercontent.com/KeithGalli/complete-pandas-tutorial/refs/heads/master/warmup-data/coffee.csv')

In [273]:
coffee
display(coffee)

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35


In [274]:
coffee.head(1)

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25


In [275]:
coffee.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Day          14 non-null     object
 1   Coffee Type  14 non-null     object
 2   Units Sold   14 non-null     int64 
dtypes: int64(1), object(2)
memory usage: 464.0+ bytes


In [276]:
coffee.describe()

Unnamed: 0,Units Sold
count,14.0
mean,32.857143
std,9.346798
min,15.0
25%,26.25
50%,35.0
75%,38.75
max,45.0


In [277]:
coffee.columns

Index(['Day', 'Coffee Type', 'Units Sold'], dtype='object')

In [278]:
coffee.sort_values('Units Sold')

Unnamed: 0,Day,Coffee Type,Units Sold
1,Monday,Latte,15
3,Tuesday,Latte,20
0,Monday,Espresso,25
5,Wednesday,Latte,25
2,Tuesday,Espresso,30
7,Thursday,Latte,30
4,Wednesday,Espresso,35
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35


In [279]:
coffee.sort_values('Units Sold', ascending=False)

Unnamed: 0,Day,Coffee Type,Units Sold
8,Friday,Espresso,45
10,Saturday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
4,Wednesday,Espresso,35
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
2,Tuesday,Espresso,30
7,Thursday,Latte,30


In [280]:
display(coffee.sort_values(['Units Sold', 'Coffee Type'], ascending=[0, 1]))
display(coffee.sort_values(['Units Sold', 'Coffee Type'], ascending=[1, 0]))

Unnamed: 0,Day,Coffee Type,Units Sold
8,Friday,Espresso,45
10,Saturday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
4,Wednesday,Espresso,35
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
2,Tuesday,Espresso,30
7,Thursday,Latte,30


Unnamed: 0,Day,Coffee Type,Units Sold
1,Monday,Latte,15
3,Tuesday,Latte,20
5,Wednesday,Latte,25
0,Monday,Espresso,25
7,Thursday,Latte,30
2,Tuesday,Espresso,30
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
4,Wednesday,Espresso,35


In [281]:
for index, row in coffee.iterrows():
  print(index)
  print(row)
  print('\n\n\n')

0
Day              Monday
Coffee Type    Espresso
Units Sold           25
Name: 0, dtype: object




1
Day            Monday
Coffee Type     Latte
Units Sold         15
Name: 1, dtype: object




2
Day             Tuesday
Coffee Type    Espresso
Units Sold           30
Name: 2, dtype: object




3
Day            Tuesday
Coffee Type      Latte
Units Sold          20
Name: 3, dtype: object




4
Day            Wednesday
Coffee Type     Espresso
Units Sold            35
Name: 4, dtype: object




5
Day            Wednesday
Coffee Type        Latte
Units Sold            25
Name: 5, dtype: object




6
Day            Thursday
Coffee Type    Espresso
Units Sold           40
Name: 6, dtype: object




7
Day            Thursday
Coffee Type       Latte
Units Sold           30
Name: 7, dtype: object




8
Day              Friday
Coffee Type    Espresso
Units Sold           45
Name: 8, dtype: object




9
Day            Friday
Coffee Type     Latte
Units Sold         35
Name: 9, dtype: object





In [282]:
coffee.nunique()  # Getting number of unique elements for each column.

Unnamed: 0,0
Day,7
Coffee Type,2
Units Sold,7


In [283]:
coffee['Coffee Type'].unique()

array(['Espresso', 'Latte'], dtype=object)

In [284]:
coffee_excel = coffee.to_excel('coffee.xlsx')  # Converting the file to excel file.

# Applying on an existance file. 'Athletics.csv'

In [285]:
athlete = pd.read_csv('https://raw.githubusercontent.com/KeithGalli/complete-pandas-tutorial/refs/heads/master/data/bios.csv').set_index('athlete_id')
athlete.head()

Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25


In [286]:
athlete.tail()

Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
149222,Polina Luchnikova,2002-01-30,Serov,Sverdlovsk,RUS,ROC,167.0,61.0,
149223,Valeriya Merkusheva,1999-09-20,Moskva (Moscow),Moskva,RUS,ROC,168.0,65.0,
149224,Yuliya Smirnova,1998-05-08,Kotlas,Arkhangelsk,RUS,ROC,163.0,55.0,
149225,André Foussard,1899-05-19,Niort,Deux-Sèvres,FRA,France,166.0,,1986-03-18
149814,Bill Phillips,1913-07-15,Dulwich Hill,New South Wales,AUS,Australia,,,2003-10-20


In [287]:
display(athlete.sample(5))  # Gettnig a random '10' samples.
display(athlete.sample(5, random_state=122))  # This will print the same rows unless I change the number '122'.

Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
23637,Juan Paladino,1925-02-17,Montevideo,Montevideo,URU,Uruguay,185.0,95.0,
89064,Ruslan Hlivinskyi,1975-02-07,,,,Ukraine,204.0,82.0,
112780,Alexis Chiclana,1987-02-02,,,,Puerto Rico,180.0,86.0,
113072,Sara Mustonen,1981-02-08,Höganäs,Skåne,SWE,Sweden,161.0,58.0,
5892,Yaya Cissokho,1955-01-24,,,,Senegal,196.0,87.0,


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
58157,Eddie Bowey,1928-01-05,London,England,GBR,Great Britain,,,2016-01-01
65816,Jane Bell,1910-06-02,Toronto,Ontario,CAN,Canada,169.0,58.0,1998-07-01
26145,Beniamino Vignola,1959-06-12,Verona,Verona,ITA,Italy,172.0,64.0,
66053,Lionel Fournier,1917-03-19,Pincher Creek,Alberta,CAN,Canada,,,1993-09-03
97498,Hiroyasu Shimizu,1974-02-27,Obihiro,Hokkaido,JPN,Japan,162.0,70.0,


In [288]:
display(athlete.loc[[1, 2, 3]])
display(athlete.loc[1:6])
display(athlete.loc[[1, 2, 3], ['name']])
display(athlete.loc[[1, 2, 3], ['name', 'born_city']])

display(athlete.iloc[[0, 1, 2], [0, 1, 2]])
display(athlete.iloc[1:3, 0:3])

Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17


Unnamed: 0_level_0,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25
6,Nicolas Chatelain,1970-01-13,Amiens,Somme,FRA,France,181.0,70.0,


Unnamed: 0_level_0,name
athlete_id,Unnamed: 1_level_1
1,Jean-François Blanchy
2,Arnaud Boetsch
3,Jean Borotra


Unnamed: 0_level_0,name,born_city
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Jean-François Blanchy,Bordeaux
2,Arnaud Boetsch,Meulan
3,Jean Borotra,Biarritz


Unnamed: 0_level_0,name,born_date,born_city
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Jean-François Blanchy,1886-12-12,Bordeaux
2,Arnaud Boetsch,1969-04-01,Meulan
3,Jean Borotra,1898-08-13,Biarritz


Unnamed: 0_level_0,name,born_date,born_city
athlete_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,Arnaud Boetsch,1969-04-01,Meulan
3,Jean Borotra,1898-08-13,Biarritz


In [289]:
athlete.info()

<class 'pandas.core.frame.DataFrame'>
Index: 145500 entries, 1 to 149814
Data columns (total 9 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   name          145500 non-null  object 
 1   born_date     143693 non-null  object 
 2   born_city     110908 non-null  object 
 3   born_region   110908 non-null  object 
 4   born_country  110908 non-null  object 
 5   NOC           145499 non-null  object 
 6   height_cm     106651 non-null  float64
 7   weight_kg     102070 non-null  float64
 8   died_date     33940 non-null   object 
dtypes: float64(2), object(7)
memory usage: 15.1+ MB


In [290]:
athlete.describe()

Unnamed: 0,height_cm,weight_kg
count,106651.0,102070.0
mean,176.333724,71.890996
std,10.380282,14.46554
min,127.0,25.0
25%,170.0,62.0
50%,176.0,70.0
75%,183.0,80.0
max,226.0,198.0


# Applying on an existance file. 'results.parquet'

In [291]:
# olympics_results = pd.read_csv('https://raw.githubusercontent.com/KeithGalli/complete-pandas-tutorial/refs/heads/master/data/results.parquet')   # Can't be decoded because it is large.
# olympics_results.head()