## Pandas Review

In [114]:
import pandas as pd


In [115]:
#Series = A pandas 1-dimensional labeled array, that can hold any data type, think of it as a like column in a spread sheet

In [116]:
data = [100, 102, 104, 200, 202]

In [117]:
series = pd.Series(data, index = ["a","b","c","d","e"]) #You can pass in a list, series, or tuple 

In [118]:
print(series.loc["a"]) #all data is arranged in a single column,  they are all given index, 0,1,2..., and the dtype is int64 

100


In [119]:
#Location by lable 
series.loc["c"] = 200

In [120]:
print(series)

a    100
b    102
c    200
d    200
e    202
dtype: int64


In [121]:
#iloc integer position
print(series.iloc[0])

100


In [122]:
print(series[series >= 200]) #index aren't treated as a thing you can functionally play with 

c    200
d    200
e    202
dtype: int64


In [123]:
#Python dict
cal = {"Day 1": 1750, "Day 2": 2100, "Day 3": 1700}

In [124]:
series = pd.Series(cal)

In [125]:
print(series)

Day 1    1750
Day 2    2100
Day 3    1700
dtype: int64


In [126]:
print(series.loc["Day 3"] )

1700


In [127]:
series.loc["Day 3"] += 500

In [128]:
print(series[series >=2000])

Day 2    2100
Day 3    2200
dtype: int64


## Data Frames

##### Data frames are tabular structure

In [129]:
data = {'Name': ["Spongebob", "Patrick", "Squidward"],
        "Age": [30, 35, 50]           
    }

In [130]:
df = pd.DataFrame(data, index=['Employee 1', "Employee 2", "Employee 3"])


In [131]:
print(df)

                 Name  Age
Employee 1  Spongebob   30
Employee 2    Patrick   35
Employee 3  Squidward   50


In [132]:
print(df.loc["Employee 1"])

Name    Spongebob
Age            30
Name: Employee 1, dtype: object


In [133]:
print(df.iloc[2])

Name    Squidward
Age            50
Name: Employee 3, dtype: object


In [134]:
# Add new column
df["Job"] = ["Cook", "NaN", "Cashier"]

In [135]:
print(df)

                 Name  Age      Job
Employee 1  Spongebob   30     Cook
Employee 2    Patrick   35      NaN
Employee 3  Squidward   50  Cashier


In [136]:
#Add a new row 
new_row = pd.DataFrame([{"Name": "Sandy", "Age": 28, "Job": "Engineer"} ], index= ["Employee 4"])

In [137]:
df= pd.concat([df, new_row])

In [138]:
df

Unnamed: 0,Name,Age,Job
Employee 1,Spongebob,30,Cook
Employee 2,Patrick,35,
Employee 3,Squidward,50,Cashier
Employee 4,Sandy,28,Engineer


In [139]:
df.iloc

<pandas.core.indexing._iLocIndexer at 0x1342dec10>

In [140]:
# Add new rows 

new_rows = pd.DataFrame([{"Name": "Eugene", "Age": 60, "Job": "Manager"},
                        {"Name": "Jordan", "Age": 21,"Job": "Student" }
                         ], index= ["Employee 5","Employee 6"])


In [141]:
df= pd.concat([df, new_rows])

In [142]:
df

Unnamed: 0,Name,Age,Job
Employee 1,Spongebob,30,Cook
Employee 2,Patrick,35,
Employee 3,Squidward,50,Cashier
Employee 4,Sandy,28,Engineer
Employee 5,Eugene,60,Manager
Employee 6,Jordan,21,Student


# Importing 

In [None]:
#df = pd.read_csv("My data") #Import csv

In [143]:
#print(df.to_string)#all rows are included 

In [144]:
#df = pd.read_json("mydata") #read json file 

# Selection

###### Selection by column 
###### pd.read_csv("data.csv", index_col = "Name")


In [None]:
# print(df["Name"].to_string()) #To print 

In [None]:
#print(df[[1,2,3],["Name","Height","Weight"]].to.string())

# Selection By Rows

In [None]:
#print(df.loc[0]) selection by row 

In [153]:
#print(df.loc["Pika"]) #selection to see which has the name PIKA

In [154]:
#print(df.loc["Pika","Ruka","Luka"]) 

In [155]:
#print(df.loc["Pika":"The dog","Ruka","Luka"]) 

In [156]:
#print(df.iloc[0:11])#selection of rows 0 to 11

In [None]:
# print(df.iloc[0:11:2, 0:3]) #0-3 for columns, 0-11 for rows 

In [None]:
#pokemon = inputs("Enter a Pokemon name : ")
#try: 
#    print(df.loc[pokemon])
# except Keyerror: 
# print(f"{pokemon}")

# Filtering

###### Keeps rows that match a certain condition

In [157]:
print(df)

                 Name  Age       Job
Employee 1  Spongebob   30      Cook
Employee 2    Patrick   35       NaN
Employee 3  Squidward   50   Cashier
Employee 4      Sandy   28  Engineer
Employee 5     Eugene   60   Manager
Employee 6     Jordan   21   Student


In [159]:
#tall_pokemon = df[df["Height"] >= 2]
# print(tall_pokemon)

In [161]:
# heavy_pokemon = df[df["Weight"] > 100 ]
#print(heavy_pokemon)

In [162]:
# legendary_poke = df[df["Legendary"] ==1 ]
# print(legendary_poke)

In [163]:
# legendary_poke = df[df["Legendary"] == True ]
# print(legendary_poke)

In [164]:
# water_pokemon = df[df["Type1"] = "Water"]
# print(water_pokemon)

In [None]:
# water_pokemon = df[(df["Type1"] = "Water") |
#(df["Type2"] == "Water")]

In [None]:
# ff_pokemon = df[(df["Type1"]== "Fire") & 
## (df["Type2"]== "Flying")]

## Review logic 

# Aggregation

In [167]:
df

Unnamed: 0,Name,Age,Job
Employee 1,Spongebob,30,Cook
Employee 2,Patrick,35,
Employee 3,Squidward,50,Cashier
Employee 4,Sandy,28,Engineer
Employee 5,Eugene,60,Manager
Employee 6,Jordan,21,Student


In [171]:
print(df.mean(numeric_only = True))
# Each Numerical columns you have the mean 

Age    37.333333
dtype: float64


In [173]:
print(df.sum(numeric_only = True))
# Each Numerical columns you have the sum

Age    224
dtype: int64


In [174]:
print(df.min(numeric_only = True))
# Each Numerical columns you have the min

Age    21
dtype: int64


In [175]:
print(df.max(numeric_only = True))
# Each Numerical columns you have the max

Age    60
dtype: int64


In [177]:
print(df.count())
# Each  columns you have the row count

Name    6
Age     6
Job     6
dtype: int64


### Single Columns

In [179]:
print(df["Age"].mean())
# Each Height columns you have the mean 

37.333333333333336


In [None]:
print(df["Age"].count())
# Each Height columns you have the count 

6


In [None]:
print(df["Age"].max())
# Each Height columns you have the max 

60


In [None]:
print(df["Age"].max())
# Each Height columns you have the mean 

### Group By

In [183]:
df

Unnamed: 0,Name,Age,Job
Employee 1,Spongebob,30,Cook
Employee 2,Patrick,35,
Employee 3,Squidward,50,Cashier
Employee 4,Sandy,28,Engineer
Employee 5,Eugene,60,Manager
Employee 6,Jordan,21,Student


In [None]:
group = df.groupby("Job")


<bound method GroupBy.mean of <pandas.core.groupby.generic.DataFrameGroupBy object at 0x174e1b650>>


In [None]:
#print(group["Height"].min())

In [190]:
#print(group["Height"].max())

In [191]:
#print(group["Height"].mean())

In [192]:
#print(group["Height"].sum())

In [193]:
#print(group["Height"].count())

# Data Cleaning

In [195]:
# dr.drop(columns = ["Legendary", "No"])

#### Missing Data

In [196]:
# df = df.dropna(subset=["Type2"])

In [198]:
#print(df.to_string)
# df = df.fillna({"Type2": "None"})

#### Inconsistant Values

In [199]:
# df["Type1"] = df["Type1"].replace({"Glass": "GRASS"})

In [None]:
# df["Type1"] = df["Type1"].replace({"Glass": "GRASS",
#"Fire": "Fire",
#"Green": "GREEn"
#})

##### Standardize Text

In [200]:
# df["Name"] = df["Name"].str.lower()

### Fix Data Type

In [None]:
# df["Legendary"] = df["Legendary"].astype(bool)

### Remove Duplicate Values

In [201]:
# df = df.drop_diplicates()