- Pandas
- Series
- Data Frame

In [2]:
import pandas as pd
from typing import List

example_list: List[int] = [1,2,3,4,5]
type(example_list)

list

In [4]:
example_list.pop()

5

In [5]:
example_list

[1, 2, 3, 4]

In [8]:
s1 = pd.Series(example_list)
s1.index = ["A","B","C","D"]
s1

A    1
B    2
C    3
D    4
dtype: int64

In [10]:
s1["A"] = 23

In [11]:
s1

A    23
B     2
C     3
D     4
dtype: int64

In [13]:
s1.index = range(4)
s1

0    23
1     2
2     3
3     4
dtype: int64

In [17]:
# Create df from dictionary
names = ["Nate", "Rebecca", "Edwin", "Preston"]
ages = [39, 40, 11, 7]
year = ["Senior", "Junior", "Sophomore", "Freshman"]

students = {
    "Name": names,
    "Age": ages,
    "Year": year
}

students_df = pd.DataFrame(students)
students_df

Unnamed: 0,Name,Age,Year
0,Nate,39,Senior
1,Rebecca,40,Junior
2,Edwin,11,Sophomore
3,Preston,7,Freshman


In [41]:
# Create df from list of rows
import random

players = ["Messi", "Mbappe", "Ronaldo", "Sala"]

cols = ["Name", "Goals/Game", "Games"]
data = [
    [player, random.random(), round(random.random() * 1000)]
    for player in players
    ]

players_df = pd.DataFrame(data, columns = cols)
players_df

Unnamed: 0,Name,Goals/Game,Games
0,Messi,0.902738,865
1,Mbappe,0.798157,681
2,Ronaldo,0.522613,978
3,Sala,0.18199,551


In [26]:
# create / update columns
players_df["Name"]

0      Messi
1     Mbappe
2    Ronaldo
3       Sala
Name: Name, dtype: object

In [23]:
players_df["Name"][0]

'Messi'

In [27]:
players_df["New_col"] = 0
players_df

Unnamed: 0,Name,Goals/Game,Games,New_col
0,Messi,0.019466,330,0
1,Mbappe,0.534021,865,0
2,Ronaldo,0.431901,748,0
3,Sala,0.142125,149,0


In [38]:
players_df["New_col"] = [i + 1 for i in range(len(players_df))]
players_df

Unnamed: 0,Name,Goals/Game,Games,New_col
0,Messi,0.696681,395,1
1,Mbappe,0.907922,145,2
2,Ronaldo,0.081138,921,3
3,Sala,0.073665,742,4


In [42]:
players_df["New_col"] = players_df["Goals/Game"] * players_df["Games"]
players_df

Unnamed: 0,Name,Goals/Game,Games,New_col
0,Messi,0.902738,865,780.868532
1,Mbappe,0.798157,681,543.545105
2,Ronaldo,0.522613,978,511.115648
3,Sala,0.18199,551,100.276683


In [43]:
# inline vs declarative
players_df = players_df.rename(columns={"New_col": "Goals"})
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals
0,Messi,0.902738,865,780.868532
1,Mbappe,0.798157,681,543.545105
2,Ronaldo,0.522613,978,511.115648
3,Sala,0.18199,551,100.276683


In [45]:
players_df["Name"] = players_df["Name"].str.lower()
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals
0,messi,0.902738,865,780.868532
1,mbappe,0.798157,681,543.545105
2,ronaldo,0.522613,978,511.115648
3,sala,0.18199,551,100.276683


In [48]:
players_df[["Name", "Goals"]]

Unnamed: 0,Name,Goals
0,messi,780.868532
1,mbappe,543.545105
2,ronaldo,511.115648
3,sala,100.276683


In [49]:
# .loc[]

players_df = players_df.set_index("Name")
players_df

Unnamed: 0_level_0,Goals/Game,Games,Goals
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
messi,0.902738,865,780.868532
mbappe,0.798157,681,543.545105
ronaldo,0.522613,978,511.115648
sala,0.18199,551,100.276683


In [52]:
players_df.loc["mbappe", "Games"]

np.int64(681)

In [53]:
players_df = players_df.reset_index()
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals
0,messi,0.902738,865,780.868532
1,mbappe,0.798157,681,543.545105
2,ronaldo,0.522613,978,511.115648
3,sala,0.18199,551,100.276683


In [54]:
players_df.loc[0]

Name               messi
Goals/Game      0.902738
Games                865
Goals         780.868532
Name: 0, dtype: object

In [55]:
players_df.loc[1:2, "Games":"Goals"]

Unnamed: 0,Games,Goals
1,681,543.545105
2,978,511.115648


In [56]:
players_df.loc[[0,3,2]]

Unnamed: 0,Name,Goals/Game,Games,Goals
0,messi,0.902738,865,780.868532
3,sala,0.18199,551,100.276683
2,ronaldo,0.522613,978,511.115648


In [None]:
players_df.loc[players_df["Goals/Game"] > 0.5]

Unnamed: 0,Name,Goals/Game,Games,Goals
0,messi,0.902738,865,780.868532
1,mbappe,0.798157,681,543.545105
2,ronaldo,0.522613,978,511.115648


In [58]:
players_df.loc[players_df["Goals/Game"] > 0.5, "Name"]

0      messi
1     mbappe
2    ronaldo
Name: Name, dtype: object

In [60]:
players_df.loc[players_df["Goals/Game"] > 0.5, "Name"] = players_df["Name"].str.upper()
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals
0,MESSI,0.902738,865,780.868532
1,MBAPPE,0.798157,681,543.545105
2,RONALDO,0.522613,978,511.115648
3,sala,0.18199,551,100.276683


In [61]:
players_df["Team"] = ["FC Barca", "PSG", "PSG", "Liverpool"]
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals,Team
0,MESSI,0.902738,865,780.868532,FC Barca
1,MBAPPE,0.798157,681,543.545105,PSG
2,RONALDO,0.522613,978,511.115648,PSG
3,sala,0.18199,551,100.276683,Liverpool


In [64]:
players_df.loc[players_df["Team"] == "PSG", "Team"] = "Paris St. Germain"
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals,Team
0,MESSI,0.902738,865,780.868532,FC Barca
1,MBAPPE,0.798157,681,543.545105,Paris St. Germain
2,RONALDO,0.522613,978,511.115648,Paris St. Germain
3,sala,0.18199,551,100.276683,Liverpool
