# Pandas Playground

## Basics

### DataFrames from Files

In [None]:
import pandas as pd

pokemon = pd.read_csv("/Users/vladyslavshutkevych/Desktop/Tools/Pandas/data/pokemon_data.csv")
pokemon.head()

df1 = pd.read_excel("/Users/vladyslavshutkevych/Desktop/Tools/Pandas/data/pokemon_data.xlsx")
df1.head()

df2 = pd.read_csv("/Users/vladyslavshutkevych/Desktop/Tools/Pandas/data/pokemon_data.txt", delimiter="\t")
df2.head()


### Reading Data

In [None]:
# Read headers
df1.columns

# Read each Column
df1["Name"][0:5]
df1[["Name", "Type 1", "HP"]]
df1.Name

# Read each Row
df1.iloc[1] # everything about second row
df1.iloc[0:4]
# for index, row in df1.iterrows():
#     print(index, row["Name"])

df1.loc[df1['Type 1'] == 'Fire']

# Read a specific Location (R, C)
df1.iloc[2,1]

### Sorting/Describing Data

In [None]:
df1.sort_values('Name')
df1.sort_values('Name', ascending=False)
df1.sort_values(['Type 1', 'HP'], ascending=[1,0]) # [ascending, descending]

### Changing Data

In [None]:
df1['Total'] = df1.iloc[:, 4:10].sum(axis=1) # horizontal sum
# df1['Total'] = df1.loc[:, 'HP':'Speed'].sum(axis=1) # stays even after commenting

# df1 = df1.drop(columns=['Total'])

columns = list(df1.columns.values)
# df1 = df1[['Total', 'HP', 'Defense']] # bad way
df1 = df1[columns[0:4] + [columns[-1]] + columns[4:12]]
df1.head(5)


### Saving Data

In [None]:
# df1.to_csv('data/modified.csv') # save file

# df1.to_csv('data/modified.csv', index=False) # ignore indicies
# df1.to_excel('data/modified.xlsx', index=False)
# df1.to_csv('data/modified.txt', index=False, sep='\t')


### Filtering Data

In [None]:
df1.loc[df1['Type 1'] == 'Grass'] # loc - choose by labels instead of indices
new_df = df1.loc[(df1['Type 1'] == 'Grass') & (df1['Type 2'] == 'Poison') & (df1['HP'] > 70)]

new_df = new_df.reset_index(drop=True) # reset index after filtering
# or
new_df.reset_index(drop=True, inplace=True)
new_df.head(5)

mega = df1.loc[df1['Name'].str.contains('Mega')] # filter by string
mega_not = df1.loc[~df1['Name'].str.contains('Mega')] # everything except Mega
mega.head(5)


In [None]:
import re
mega = df1.loc[df1['Type 1'].str.contains('fire|grass', flags=re.IGNORECASE, regex=True)]

mega = df1.loc[df1['Name'].str.contains('pi[a-z]*', flags=re.IGNORECASE, regex=True)] # contains pi followed by any letters
mega = df1.loc[df1['Name'].str.contains('^pi[a-z]*', flags=re.IGNORECASE, regex=True)] # starts with pi
mega = df1.loc[df1['Name'].str.contains('pi[a-z]*$', flags=re.IGNORECASE, regex=True)] # ends with pi


### Conditional Changes

In [None]:
df1.loc[df1['Type 1'] == 'Fire', 'Type 1'] = 'Flamer' # change values (specific column)
df1.loc[df1['Type 1'] == 'Grass', 'Legendary'] = True # all values in Legendary column become True if Type 1 is Grass
df1


Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,True
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,True
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,True
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,True
4,4,Charmander,Flamer,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


### Creating DataFrames, Index/Columns, Basic Functionality

In [None]:


df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]], columns=["A", "B", "C"], index=["x","y","z"])
df.head() # all rows
df.head(1) # specific row
df.tail(2) # bottow 2 rows
df.index.to_list() # find indices (indices instead of rows)
df1.columns

# Info
df.info() # concise info overview
df.describe() # summary of statistics
df.size # num of elements

# Unique values
df.nunique()
df["A"].unique

df.shape
