# Pandas methods testing

### Creating, loading, and saving of data

In [68]:
# Creation
people = {
    'first' : ['ginger', 'baddie', 'mickey', 'meggy'],
    'last' : ['bread', 'baldie', 'mann', 'muffin'],
    'email' : ['gingerbread@email.com', 'baddiebaldie@email.com', 'mickeymann@email.com', 'meggymuffin@email.com'],
    'sex' : ['male', 'female', 'male', 'female'],
    'age' : [18, 27, 33, 15],
}

In [69]:
# Loading
import pandas as pd

people_dataframe = pd.DataFrame(people)
people_dataframe

Unnamed: 0,first,last,email,sex,age
0,ginger,bread,gingerbread@email.com,male,18
1,baddie,baldie,baddiebaldie@email.com,female,27
2,mickey,mann,mickeymann@email.com,male,33
3,meggy,muffin,meggymuffin@email.com,female,15


In [70]:
# Saving to csv
people_dataframe.to_csv('data/people_dataframe.csv')

In [71]:
# Loading from csv file
people_dataframe = pd.read_csv(r'data\people_dataframe.csv', index_col=0)
people_dataframe

Unnamed: 0,first,last,email,sex,age
0,ginger,bread,gingerbread@email.com,male,18
1,baddie,baldie,baddiebaldie@email.com,female,27
2,mickey,mann,mickeymann@email.com,male,33
3,meggy,muffin,meggymuffin@email.com,female,15


### Displaying of dataframe and information about it

In [72]:
# Displaying first 5 rows of dataframe
people_dataframe.head(5)

Unnamed: 0,first,last,email,sex,age
0,ginger,bread,gingerbread@email.com,male,18
1,baddie,baldie,baddiebaldie@email.com,female,27
2,mickey,mann,mickeymann@email.com,male,33
3,meggy,muffin,meggymuffin@email.com,female,15


In [73]:
# Setting display option of dataframe
pd.set_option('display.max_columns', 20)

In [74]:
# Showing numbers of rows  and columns
people_dataframe.shape

(4, 5)

In [75]:
# Showing the columns' information
people_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 0 to 3
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   first   4 non-null      object
 1   last    4 non-null      object
 2   email   4 non-null      object
 3   sex     4 non-null      object
 4   age     4 non-null      int64 
dtypes: int64(1), object(4)
memory usage: 192.0+ bytes


In [76]:
# Statistics about the dataframe
people_dataframe.describe()

Unnamed: 0,age
count,4.0
mean,23.25
std,8.261356
min,15.0
25%,17.25
50%,22.5
75%,28.5
max,33.0


### Selecting rows and columns

In [77]:
# Selecting columns
people_dataframe['email']

0     gingerbread@email.com
1    baddiebaldie@email.com
2      mickeymann@email.com
3     meggymuffin@email.com
Name: email, dtype: object

In [78]:
# Selecting multiple columns
people_dataframe[['first', 'last', 'email']]

Unnamed: 0,first,last,email
0,ginger,bread,gingerbread@email.com
1,baddie,baldie,baddiebaldie@email.com
2,mickey,mann,mickeymann@email.com
3,meggy,muffin,meggymuffin@email.com


In [79]:
# Selecting rows by index
people_dataframe.iloc[0]

first                   ginger
last                     bread
email    gingerbread@email.com
sex                       male
age                         18
Name: 0, dtype: object

In [80]:
# Selecting multiple rows by index
people_dataframe.iloc[[0,1], 2]

0     gingerbread@email.com
1    baddiebaldie@email.com
Name: email, dtype: object

In [81]:
# Selecting multiple rows with selected columns by index
people_dataframe.iloc[[0,1], [2,3]]

Unnamed: 0,email,sex
0,gingerbread@email.com,male
1,baddiebaldie@email.com,female


In [82]:
# Selecting multiple rows with selected columns by label
people_dataframe.loc[[0,1], ['email', 'sex']]

Unnamed: 0,email,sex
0,gingerbread@email.com,male
1,baddiebaldie@email.com,female


In [83]:
# Getting single value
people_dataframe.at[0, 'first']

'ginger'

### Filtering the data frame

In [84]:
# Searching through the data frame with given conditions
filt = (people_dataframe['age'] <= 20)
people_dataframe.loc[filt]

Unnamed: 0,first,last,email,sex,age
0,ginger,bread,gingerbread@email.com,male,18
3,meggy,muffin,meggymuffin@email.com,female,15


In [85]:
# Searching through the data frame by given conditions with selected columns
people_dataframe.loc[filt, ['first', 'last', 'age']]

Unnamed: 0,first,last,age
0,ginger,bread,18
3,meggy,muffin,15


### Set, reset, sort, and use index

In [86]:
# Showing index
people_dataframe.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [87]:
# Setting index
people_dataframe.set_index('email', inplace=True)

In [88]:
# Dataframe after index set
people_dataframe

Unnamed: 0_level_0,first,last,sex,age
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gingerbread@email.com,ginger,bread,male,18
baddiebaldie@email.com,baddie,baldie,female,27
mickeymann@email.com,mickey,mann,male,33
meggymuffin@email.com,meggy,muffin,female,15


In [89]:
# Selecting row by idex
people_dataframe.loc['mickeymann@email.com']

first    mickey
last       mann
sex        male
age          33
Name: mickeymann@email.com, dtype: object

In [90]:
# Resetting index
people_dataframe.reset_index(inplace=True)

In [91]:
# Dataframe after index reset
people_dataframe

Unnamed: 0,email,first,last,sex,age
0,gingerbread@email.com,ginger,bread,male,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [92]:
# Sorting index
people_dataframe.sort_index(ascending=False)

Unnamed: 0,email,first,last,sex,age
3,meggymuffin@email.com,meggy,muffin,female,15
2,mickeymann@email.com,mickey,mann,male,33
1,baddiebaldie@email.com,baddie,baldie,female,27
0,gingerbread@email.com,ginger,bread,male,18


### Modifying data within dataframe

In [93]:
# Renaming columns
people_dataframe.columns = ['email', 'first', 'last', 'sex', 'age']
people_dataframe

Unnamed: 0,email,first,last,sex,age
0,gingerbread@email.com,ginger,bread,male,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [94]:
# Renaming columns using mapping
people_dataframe.rename(columns={'first' : 'first name', 'last' : 'last name'}, inplace=True)
people_dataframe

Unnamed: 0,email,first name,last name,sex,age
0,gingerbread@email.com,ginger,bread,male,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [95]:
# Modifying columns with string manipulation
people_dataframe.columns = people_dataframe.columns.str.replace(' ', '_')
people_dataframe

Unnamed: 0,email,first_name,last_name,sex,age
0,gingerbread@email.com,ginger,bread,male,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [96]:
# Modifying columns with list comprehension
people_dataframe.columns = [x.upper() for x in people_dataframe.columns]
people_dataframe

Unnamed: 0,EMAIL,FIRST_NAME,LAST_NAME,SEX,AGE
0,gingerbread@email.com,ginger,bread,male,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [97]:
# Modifying columns with string manipulation
people_dataframe.columns = people_dataframe.columns.str.lower()
people_dataframe

Unnamed: 0,email,first_name,last_name,sex,age
0,gingerbread@email.com,ginger,bread,male,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [98]:
# Modifying row values by index
people_dataframe.loc[0] = ['breadginger@email.com', 'bread', 'ginger', 'female', 81]
people_dataframe

Unnamed: 0,email,first_name,last_name,sex,age
0,breadginger@email.com,bread,ginger,female,81
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [99]:
# Modifying row values by index with selected columns
people_dataframe.loc[0, ['first_name', 'last_name']] = ['ginger', 'bread']
people_dataframe

Unnamed: 0,email,first_name,last_name,sex,age
0,breadginger@email.com,ginger,bread,female,81
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [100]:
# Modifying single row values by index and column
people_dataframe.loc[0, 'age'] = 18
people_dataframe

Unnamed: 0,email,first_name,last_name,sex,age
0,breadginger@email.com,ginger,bread,female,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [101]:
# Modifying row values with filter
filt = people_dataframe['first_name'] == 'mickey'
people_dataframe.loc[filt, 'first_name'] = 'mouse'
people_dataframe

Unnamed: 0,email,first_name,last_name,sex,age
0,breadginger@email.com,ginger,bread,female,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mouse,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [102]:
# Modifying row values
people_dataframe['first_name'].str.capitalize()

0    Ginger
1    Baddie
2     Mouse
3     Meggy
Name: first_name, dtype: object

In [103]:
# Modifying row values using apply method
people_dataframe['email'].apply(lambda x: x.upper())

0     BREADGINGER@EMAIL.COM
1    BADDIEBALDIE@EMAIL.COM
2      MICKEYMANN@EMAIL.COM
3     MEGGYMUFFIN@EMAIL.COM
Name: email, dtype: object

In [104]:
# Modifying all row values using applymap method
people_dataframe.loc[:, people_dataframe.columns != 'age'].applymap(str.capitalize)

Unnamed: 0,email,first_name,last_name,sex
0,Breadginger@email.com,Ginger,Bread,Female
1,Baddiebaldie@email.com,Baddie,Baldie,Female
2,Mickeymann@email.com,Mouse,Mann,Male
3,Meggymuffin@email.com,Meggy,Muffin,Female


In [105]:
# Replacing multiple row values by column with map
people_dataframe['first_name'] = people_dataframe['first_name'].replace({'mouse' : 'mickey', 'meggy' : 'chungus'})
people_dataframe

Unnamed: 0,email,first_name,last_name,sex,age
0,breadginger@email.com,ginger,bread,female,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,chungus,muffin,female,15


### Adding and removing rows and columns from the data frame

In [106]:
# Combining two columns and creating another column from it
people_dataframe['full_name'] = people_dataframe['first_name'] + ' ' + people_dataframe['last_name']
people_dataframe

Unnamed: 0,email,first_name,last_name,sex,age,full_name
0,breadginger@email.com,ginger,bread,female,18,ginger bread
1,baddiebaldie@email.com,baddie,baldie,female,27,baddie baldie
2,mickeymann@email.com,mickey,mann,male,33,mickey mann
3,meggymuffin@email.com,chungus,muffin,female,15,chungus muffin


In [107]:
# Removing columns
people_dataframe.drop(columns=['first_name', 'last_name'], inplace=True)
people_dataframe

Unnamed: 0,email,sex,age,full_name
0,breadginger@email.com,female,18,ginger bread
1,baddiebaldie@email.com,female,27,baddie baldie
2,mickeymann@email.com,male,33,mickey mann
3,meggymuffin@email.com,female,15,chungus muffin


In [108]:
# Creating new columns by splitting values from other columns
people_dataframe[['first_name', 'last_name']] = people_dataframe['full_name'].str.split(expand=True)
people_dataframe

Unnamed: 0,email,sex,age,full_name,first_name,last_name
0,breadginger@email.com,female,18,ginger bread,ginger,bread
1,baddiebaldie@email.com,female,27,baddie baldie,baddie,baldie
2,mickeymann@email.com,male,33,mickey mann,mickey,mann
3,meggymuffin@email.com,female,15,chungus muffin,chungus,muffin


In [109]:
# Adding new column and values
people_dataframe = people_dataframe.assign(has_hands=['got eaten', 'yes', 'smol', 'chungus'])
people_dataframe

Unnamed: 0,email,sex,age,full_name,first_name,last_name,has_hands
0,breadginger@email.com,female,18,ginger bread,ginger,bread,got eaten
1,baddiebaldie@email.com,female,27,baddie baldie,baddie,baldie,yes
2,mickeymann@email.com,male,33,mickey mann,mickey,mann,smol
3,meggymuffin@email.com,female,15,chungus muffin,chungus,muffin,chungus


In [110]:
# Adding dataframe to another dataframe
people2 = {
    'email' : ['bigpotato@email.com'],
    'sex' : ['male'],
    'age' : [25],
    'full_name' : ['big potato'],
    'first_name' : ['big'], 
    'last_name' : ['potato'],
    'has_hands' : ['potat'],
}
people_dataframe_2 = pd.DataFrame(people2)

people_dataframe = people_dataframe.append(people_dataframe_2, ignore_index=True)
people_dataframe

Unnamed: 0,email,sex,age,full_name,first_name,last_name,has_hands
0,breadginger@email.com,female,18,ginger bread,ginger,bread,got eaten
1,baddiebaldie@email.com,female,27,baddie baldie,baddie,baldie,yes
2,mickeymann@email.com,male,33,mickey mann,mickey,mann,smol
3,meggymuffin@email.com,female,15,chungus muffin,chungus,muffin,chungus
4,bigpotato@email.com,male,25,big potato,big,potato,potat


In [111]:
# Adding new rows
people_dataframe.loc[len(people_dataframe.index)] = ['sammiacm@email.com', 'female', 45, 'sammi acm', 'sammi', 'acm', 'schmol']
people_dataframe

Unnamed: 0,email,sex,age,full_name,first_name,last_name,has_hands
0,breadginger@email.com,female,18,ginger bread,ginger,bread,got eaten
1,baddiebaldie@email.com,female,27,baddie baldie,baddie,baldie,yes
2,mickeymann@email.com,male,33,mickey mann,mickey,mann,smol
3,meggymuffin@email.com,female,15,chungus muffin,chungus,muffin,chungus
4,bigpotato@email.com,male,25,big potato,big,potato,potat
5,sammiacm@email.com,female,45,sammi acm,sammi,acm,schmol


In [112]:
# Removing rows by index with list comprehension
people_dataframe.drop(index=[x for x in range(4,people_dataframe.index.__len__())])

Unnamed: 0,email,sex,age,full_name,first_name,last_name,has_hands
0,breadginger@email.com,female,18,ginger bread,ginger,bread,got eaten
1,baddiebaldie@email.com,female,27,baddie baldie,baddie,baldie,yes
2,mickeymann@email.com,male,33,mickey mann,mickey,mann,smol
3,meggymuffin@email.com,female,15,chungus muffin,chungus,muffin,chungus


In [113]:
# Removing rows by conditions
filt = (people_dataframe['first_name'] == 'big') | (people_dataframe['last_name'] == 'acm')
people_dataframe.drop(index=people_dataframe[filt].index)

Unnamed: 0,email,sex,age,full_name,first_name,last_name,has_hands
0,breadginger@email.com,female,18,ginger bread,ginger,bread,got eaten
1,baddiebaldie@email.com,female,27,baddie baldie,baddie,baldie,yes
2,mickeymann@email.com,male,33,mickey mann,mickey,mann,smol
3,meggymuffin@email.com,female,15,chungus muffin,chungus,muffin,chungus


### Sorting data

In [114]:
# Sorting by single column
people_dataframe.sort_values(by=['full_name'], ascending=True)

Unnamed: 0,email,sex,age,full_name,first_name,last_name,has_hands
1,baddiebaldie@email.com,female,27,baddie baldie,baddie,baldie,yes
4,bigpotato@email.com,male,25,big potato,big,potato,potat
3,meggymuffin@email.com,female,15,chungus muffin,chungus,muffin,chungus
0,breadginger@email.com,female,18,ginger bread,ginger,bread,got eaten
2,mickeymann@email.com,male,33,mickey mann,mickey,mann,smol
5,sammiacm@email.com,female,45,sammi acm,sammi,acm,schmol


In [115]:
# Sorting by multiple columns
people_dataframe.sort_values(by=['first_name', 'age'], ascending=[True, False])

Unnamed: 0,email,sex,age,full_name,first_name,last_name,has_hands
1,baddiebaldie@email.com,female,27,baddie baldie,baddie,baldie,yes
4,bigpotato@email.com,male,25,big potato,big,potato,potat
3,meggymuffin@email.com,female,15,chungus muffin,chungus,muffin,chungus
0,breadginger@email.com,female,18,ginger bread,ginger,bread,got eaten
2,mickeymann@email.com,male,33,mickey mann,mickey,mann,smol
5,sammiacm@email.com,female,45,sammi acm,sammi,acm,schmol


In [116]:
# Single column sorting
people_dataframe['email'].sort_values(ascending=True)

1    baddiebaldie@email.com
4       bigpotato@email.com
0     breadginger@email.com
3     meggymuffin@email.com
2      mickeymann@email.com
5        sammiacm@email.com
Name: email, dtype: object

### Grouping and aggregating data