# Pandas methods testing

### Creating, loading, and saving of data

In [38]:
# Creation
people = {
    'first' : ['ginger', 'baddie', 'mickey', 'meggy'],
    'last' : ['bread', 'baldie', 'mann', 'muffin'],
    'email' : ['gingerbread@email.com', 'baddiebaldie@email.com', 'mickeymann@email.com', 'meggymuffin@email.com'],
    'sex' : ['male', 'female', 'male', 'female'],
    'age' : [18, 27, 33, 15],
}

In [39]:
# Loading
import pandas as pd

people_dataframe = pd.DataFrame(people)
people_dataframe

Unnamed: 0,first,last,email,sex,age
0,ginger,bread,gingerbread@email.com,male,18
1,baddie,baldie,baddiebaldie@email.com,female,27
2,mickey,mann,mickeymann@email.com,male,33
3,meggy,muffin,meggymuffin@email.com,female,15


In [40]:
# Saving to csv
people_dataframe.to_csv('data/people_dataframe.csv')

In [41]:
# Loading from csv file
people_dataframe = pd.read_csv(r'data\people_dataframe.csv', index_col=0)
people_dataframe

Unnamed: 0,first,last,email,sex,age
0,ginger,bread,gingerbread@email.com,male,18
1,baddie,baldie,baddiebaldie@email.com,female,27
2,mickey,mann,mickeymann@email.com,male,33
3,meggy,muffin,meggymuffin@email.com,female,15


### Displaying of dataframe and information about it

In [42]:
# Displaying first 5 rows of dataframe
people_dataframe.head(5)

Unnamed: 0,first,last,email,sex,age
0,ginger,bread,gingerbread@email.com,male,18
1,baddie,baldie,baddiebaldie@email.com,female,27
2,mickey,mann,mickeymann@email.com,male,33
3,meggy,muffin,meggymuffin@email.com,female,15


In [43]:
# Setting display option of dataframe
pd.set_option('display.max_columns', 5)

In [44]:
# Showing numbers of rows  and columns
people_dataframe.shape

(4, 5)

In [45]:
# Showing the columns' information
people_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 0 to 3
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   first   4 non-null      object
 1   last    4 non-null      object
 2   email   4 non-null      object
 3   sex     4 non-null      object
 4   age     4 non-null      int64 
dtypes: int64(1), object(4)
memory usage: 192.0+ bytes


In [46]:
# Statistics about the dataframe
people_dataframe.describe()

Unnamed: 0,age
count,4.0
mean,23.25
std,8.261356
min,15.0
25%,17.25
50%,22.5
75%,28.5
max,33.0


### Selecting rows and columns

In [47]:
# Selecting columns
people_dataframe['email']

0     gingerbread@email.com
1    baddiebaldie@email.com
2      mickeymann@email.com
3     meggymuffin@email.com
Name: email, dtype: object

In [48]:
# Selecting multiple columns
people_dataframe[['first', 'last', 'email']]

Unnamed: 0,first,last,email
0,ginger,bread,gingerbread@email.com
1,baddie,baldie,baddiebaldie@email.com
2,mickey,mann,mickeymann@email.com
3,meggy,muffin,meggymuffin@email.com


In [49]:
# Selecting rows by index
people_dataframe.iloc[0]

first                   ginger
last                     bread
email    gingerbread@email.com
sex                       male
age                         18
Name: 0, dtype: object

In [50]:
# Selecting multiple rows by index
people_dataframe.iloc[[0,1], 2]

0     gingerbread@email.com
1    baddiebaldie@email.com
Name: email, dtype: object

In [51]:
# Selecting multiple rows with selected columns by index
people_dataframe.iloc[[0,1], [2,3]]

Unnamed: 0,email,sex
0,gingerbread@email.com,male
1,baddiebaldie@email.com,female


In [52]:
# Selecting multiple rows with selected columns by label
people_dataframe.loc[[0,1], ['email', 'sex']]

Unnamed: 0,email,sex
0,gingerbread@email.com,male
1,baddiebaldie@email.com,female


In [53]:
# Getting single value
people_dataframe.at[0, 'first']

'ginger'

### Filtering the data frame

In [54]:
# Searching through the data frame with given conditions
age_filter = (people_dataframe['age'] <= 20)
people_dataframe.loc[age_filter]

Unnamed: 0,first,last,email,sex,age
0,ginger,bread,gingerbread@email.com,male,18
3,meggy,muffin,meggymuffin@email.com,female,15


In [55]:
# Searching through the data frame by given conditions with selected columns
people_dataframe.loc[age_filter, ['first', 'last', 'age']]

Unnamed: 0,first,last,age
0,ginger,bread,18
3,meggy,muffin,15


### Set, reset, sort, and use index

In [56]:
# Showing index
people_dataframe.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [57]:
# Setting index
people_dataframe.set_index('email', inplace=True)

In [58]:
# Dataframe after index set
people_dataframe

Unnamed: 0_level_0,first,last,sex,age
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gingerbread@email.com,ginger,bread,male,18
baddiebaldie@email.com,baddie,baldie,female,27
mickeymann@email.com,mickey,mann,male,33
meggymuffin@email.com,meggy,muffin,female,15


In [59]:
# Selecting row by idex
people_dataframe.loc['mickeymann@email.com']

first    mickey
last       mann
sex        male
age          33
Name: mickeymann@email.com, dtype: object

In [60]:
# Resetting index
people_dataframe.reset_index(inplace=True)

In [61]:
# Dataframe after index reset
people_dataframe

Unnamed: 0,email,first,last,sex,age
0,gingerbread@email.com,ginger,bread,male,18
1,baddiebaldie@email.com,baddie,baldie,female,27
2,mickeymann@email.com,mickey,mann,male,33
3,meggymuffin@email.com,meggy,muffin,female,15


In [62]:
# Sorting index
people_dataframe.sort_index(ascending=False)

Unnamed: 0,email,first,last,sex,age
3,meggymuffin@email.com,meggy,muffin,female,15
2,mickeymann@email.com,mickey,mann,male,33
1,baddiebaldie@email.com,baddie,baldie,female,27
0,gingerbread@email.com,ginger,bread,male,18
