# Day 1 â€“ Pandas Series Practice

Topics covered:
- Creating Series
- Indexing
- Basic operations
- Intro to data exploration


**SERIES = A PANDAS 1-DIMENSIONAL LABELED ARRY THAT CAN HOLD ANY DATA TYPE "THINK OF IT LIKE A SINGLE COLUMN IN A SPREADSHEET" {1-DIMENSIONAL}**

**# Create df**

In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria"],
    "age": [31, 28]
})

print(df)

    name  age
0  Arley   31
1  Maria   28


**# Group By AVG**

In [None]:
import pandas as pd

data = {
    "vendor": ["UPS", "FedEx", "UPS", "DHL"],
    "cost": [120, 150, 90, 200]
}

df = pd.DataFrame(data)

# Find avg cost per vendor
df.groupby("vendor")["cost"].mean()
#UPS is being averaged out by 120 & 90 per dictionary above

Unnamed: 0_level_0,cost
vendor,Unnamed: 1_level_1
DHL,200.0
FedEx,150.0
UPS,105.0


**# Update Values**

In [None]:
import pandas as pd

data = [100, 102, 103, 105, 106] #list

series = pd.Series(data, index=["a", "b", "c", "d", "e"])

series.loc["c"] = 200
#loc allows us to change the data type from or list in data depending what index we use

print(series)

a    100
b    102
c    200
d    105
e    106
dtype: int64


**# INDEX (.loc/.iloc)**

In [None]:
import pandas as pd

calories = {"Day 1": 1750, "Day 2": 2100, "Day 3": 1700} #dictionary

series=pd.Series(calories) #no index needed due to the dictionary having values

print(series.loc["Day 1"]) #here loc is used to search what value is in "Day 1"

1750


In [None]:
import pandas as pd

calories = {"Day 1": 1750, "Day 2": 2100, "Day 3": 1700} #dictionary

series=pd.Series(calories) #no index needed due to the dictionary having values

series.loc["Day 3"] += 300
#We added more calories for Day 3.

print(series)

Day 1    1750
Day 2    2100
Day 3    2000
dtype: int64


In [None]:
import pandas as pd

calories = {"Day 1": 1750, "Day 2": 2100, "Day 3": 1700} #dictionary

series=pd.Series(calories) #no index needed due to the dictionary having values

print(series.iloc[1]) # First row by position

2100


In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria","John"],
    "age": [31, 28, 34]
})

print(df.iloc[0]) # First row by position

name    Arley
age        31
Name: 0, dtype: object


In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, 28, 34]
})

print(df.iloc[1:2]) # slicing

    name  age
1  Maria   28


**# Filter**

In [None]:
import pandas as pd

data = [100, 102, 103, 200, 289, 370] #list

series = pd.Series(data, index=["a", "b", "c", "d", "e","f"])

print(series[series >= 200]) #anything great than or equal to 200

d    200
e    289
f    370
dtype: int64


In [None]:
import pandas as pd

calories = {"Day 1": 1750, "Day 2": 2100, "Day 3": 1700} #dictionary

series=pd.Series(calories) #no index needed due to the dictionary having values

print(series[series < 2000]) #Returns the days calories were under 2000

Day 1    1750
Day 3    1700
dtype: int64


**# Handling Missing Values**

In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, None, 34]
})

#Checks which values are missing & Returns True/False
df.isna()

Unnamed: 0,name,age
0,False,False
1,False,True
2,False,False


In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, None, 34]
})

# Removes rows with missing values/ Rows with NaN is gone
df.dropna()

Unnamed: 0,name,age
0,Arley,31.0
2,John,34.0


In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, None, 34]
})

# Replaces missing values with something/Replacing NaN with a 0
df.fillna(0)

Unnamed: 0,name,age
0,Arley,31.0
1,Maria,0.0
2,John,34.0


**# Data Types (dtype)**

In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, None, 34]
})

df.dtypes #Shows what type is our data columns

Unnamed: 0,0
name,object
age,float64


In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, None, 34]
})

# To make the type change permanent, reassign the result to df
df = df.astype({"age": "Int64"}) #We changed the Type from a float64 to Int64

print(df)
print(df.dtypes)

    name   age
0  Arley    31
1  Maria  <NA>
2   John    34
name    object
age      Int64
dtype: object


**# Vectorized Operations**

In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, None, 34]
})

df * 2

Unnamed: 0,name,age
0,ArleyArley,62.0
1,MariaMaria,
2,JohnJohn,68.0


In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, None, 34]
})

#Added 10 to all ages except for Maria since she is NaN "None"
df["age"] + 10

Unnamed: 0,age
0,41.0
1,
2,44.0


In [None]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Arley", "Maria", "John"],
    "age": [31, None, 34]
})

#Added .fillna(0) + 10 so Maria can convert from NaN to 10
df["age"].fillna(0) + 10

Unnamed: 0,age
0,41.0
1,10.0
2,44.0


**# Sort**

In [2]:
import pandas as pd

series =  pd.Series([200, 50, 100], index=["b", "c", "a"])

print(series) #Returns Messy Values & Index

b    200
c     50
a    100
dtype: int64


In [4]:
import pandas as pd

series =  pd.Series([200, 50, 100], index=["b", "c", "a"])

print(series.sort_values()) #.sort_values() Sorted from Smallest to Largest value

#If you want to sort Largest to SmallesT use series.sort_values(ascending=False)

c     50
a    100
b    200
dtype: int64


In [5]:
import pandas as pd

series =  pd.Series([200, 50, 100], index=["b", "c", "a"])

print(series.sort_index()) #.sort_index() Sorted by alphabetical by index "Sort by row labels"

a    100
b    200
c     50
dtype: int64
