In [21]:
import pandas as pd
import numpy as np
import yaml
pd.__version__

data = pd.read_csv('uk-500.csv')

In [7]:
!jupyter lab paths

Application directory:   /home/jeffye/anaconda3/envs/py38/share/jupyter/lab
User Settings directory: /home/jeffye/.jupyter/lab/user-settings
Workspaces directory: /home/jeffye/.jupyter/lab/workspaces


# create dataframe 

## create dataframe with different ways

In [4]:
# initialize list of lists
data = [['tom', 10], ['nick', 15], ['juli', 14]]
# Create the pandas DataFrame
df = pd.DataFrame(data, columns=['Name', 'Age'])

# print dataframe.
df.loc

Unnamed: 0,Name,Age
0,tom,10
1,nick,15
2,juli,14


## Creating DataFrame from dict of narray/lists

In [7]:
# initialize data of lists. dict key is column
data = {'Name': ['Tom', 'nick', 'krish', 'jack'],
        'Age': [20, 21, 19, 18]}

# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
df.head()

Unnamed: 0,Name,Age
0,Tom,20
1,nick,21
2,krish,19
3,jack,18


## Creates a indexes DataFrame using arrays.

In [8]:
# initialize data of lists.
data = {'Name':['Tom', 'Jack', 'nick', 'juli'],
        'marks':[99, 98, 95, 90]}
 
# Creates pandas DataFrame.
df = pd.DataFrame(data, index =['rank1',
                                'rank2',
                                'rank3',
                                'rank4'])
 
# print the data
df.tail(4)

Unnamed: 0,Name,marks
rank1,Tom,99
rank2,Jack,98
rank3,nick,95
rank4,juli,90


## Creating Dataframe from list of dicts

In [12]:
# Initialize data to lists. "NaN" will be added automatically if having missing values
data = [{'a': 1, 'b': 2, 'c':3, 'd': 8},
        {'a':10, 'b': 20, 'c': 30}]
 
# Creates DataFrame.
df = pd.DataFrame(data)
 
# Print the data
df

Unnamed: 0,a,b,c,d
0,1,2,3,8.0
1,10,20,30,


## Creating DataFrame using zip() function.

In [14]:
# List1
Name = ['tom', 'krish', 'nick', 'juli']
   
# List2
Age = [25, 30, 26, 22]
   
# get the list of tuples from two lists.
# and merge them by using zip().
list_of_tuples = list(zip(Name, Age))
   
# Assign data to tuples.
list_of_tuples 
 
# Converting lists of tuples into
# pandas Dataframe.
df = pd.DataFrame(list_of_tuples,
                  columns = ['Name', 'Age'])
    
# Print data.
df

Unnamed: 0,Name,Age
0,tom,25
1,krish,30
2,nick,26
3,juli,22


## Creating DataFrame from Dicts of series.

In [16]:
# Initialize data to Dicts of series.
d = {'one' : pd.Series([10, 20, 30, 40],
                       index =['a', 'b', 'c', 'd']),
      'two' : pd.Series([10, 20, 30, 40],
                        index =['a', 'b', 'c', 'd'])}
 
# creates Dataframe.
df = pd.DataFrame(d)
 
# print the data.
df

Unnamed: 0,one,two
a,10,10
b,20,20
c,30,30
d,40,40


## create Series

In [5]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

# check nan of row and columns

In [31]:
data = {'set_of_numbers': [1,2,3,4,5,np.nan,6,7,np.nan,8,9,10,np.nan], 
       'column2': [1,2,3,4,5,np.nan,6,7,np.nan,8,9,10,np.nan]}
df = pd.DataFrame(data)
print (df)

    set_of_numbers  column2
0              1.0      1.0
1              2.0      2.0
2              3.0      3.0
3              4.0      4.0
4              5.0      5.0
5              NaN      NaN
6              6.0      6.0
7              7.0      7.0
8              NaN      NaN
9              8.0      8.0
10             9.0      9.0
11            10.0     10.0
12             NaN      NaN


## Check for NaN under a single DataFrame column:

In [24]:
df['set_of_numbers'].isnull().values.any()

True

In [28]:
df['set_of_numbers'].isnull().sum()

3

In [35]:
df.isnull().head()

Unnamed: 0,set_of_numbers,column2
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False


## ways of fill nan
* https://vimsky.com/zh-tw/examples/usage/python-pandas.DataFrame.fillna.html

# copy
## copy columns and also keep index

In [8]:
data = {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [7, 8, 9]}
df = pd.DataFrame(data, index=['rank1', 'rank2', 'rank3'])
print(df.head())
selected_columns = df[["col1", "col2"]]

       col1  col2  col3
rank1     1     4     7
rank2     2     5     8
rank3     3     6     9


In [9]:
selected_columns

Unnamed: 0,col1,col2
rank1,1,4
rank2,2,5
rank3,3,6


## copy by merging multiple indices (tolist first)

In [40]:
new_df = df.loc[selected_columns.index[:2].tolist() + selected_columns.index[:2].tolist()]
new_df['col3'] = 9
new_df

Unnamed: 0,col1,col2,col3
rank1,1,4,9
rank2,2,5,9
rank1,1,4,9
rank2,2,5,9


# index

## set a column as index

In [25]:
data.head()

Unnamed: 0,first_name,last_name,company_name,address,city,county,postal,phone1,phone2,email,web
0,Aleshia,Tomkiewicz,Alan D Rosenburg Cpa Pc,14 Taylor St,St. Stephens Ward,Kent,CT2 7PP,01835-703597,01944-369967,atomkiewicz@hotmail.com,http://www.alandrosenburgcpapc.co.uk
1,Evan,Zigomalas,Cap Gemini America,5 Binney St,Abbey Ward,Buckinghamshire,HP11 2AX,01937-864715,01714-737668,evan.zigomalas@gmail.com,http://www.capgeminiamerica.co.uk
2,France,Andrade,"Elliott, John W Esq",8 Moor Place,East Southbourne and Tuckton W,Bournemouth,BH6 3BE,01347-368222,01935-821636,france.andrade@hotmail.com,http://www.elliottjohnwesq.co.uk
3,Ulysses,Mcwalters,"Mcmahan, Ben L",505 Exeter Rd,Hawerby cum Beesby,Lincolnshire,DN36 5RP,01912-771311,01302-601380,ulysses@hotmail.com,http://www.mcmahanbenl.co.uk
4,Tyisha,Veness,Champagne Room,5396 Forth Street,Greets Green and Lyng Ward,West Midlands,B70 9DT,01547-429341,01290-367248,tyisha.veness@hotmail.com,http://www.champagneroom.co.uk


In [26]:
data.set_index("last_name", inplace=True)
data.head()

Unnamed: 0_level_0,first_name,company_name,address,city,county,postal,phone1,phone2,email,web
last_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Tomkiewicz,Aleshia,Alan D Rosenburg Cpa Pc,14 Taylor St,St. Stephens Ward,Kent,CT2 7PP,01835-703597,01944-369967,atomkiewicz@hotmail.com,http://www.alandrosenburgcpapc.co.uk
Zigomalas,Evan,Cap Gemini America,5 Binney St,Abbey Ward,Buckinghamshire,HP11 2AX,01937-864715,01714-737668,evan.zigomalas@gmail.com,http://www.capgeminiamerica.co.uk
Andrade,France,"Elliott, John W Esq",8 Moor Place,East Southbourne and Tuckton W,Bournemouth,BH6 3BE,01347-368222,01935-821636,france.andrade@hotmail.com,http://www.elliottjohnwesq.co.uk
Mcwalters,Ulysses,"Mcmahan, Ben L",505 Exeter Rd,Hawerby cum Beesby,Lincolnshire,DN36 5RP,01912-771311,01302-601380,ulysses@hotmail.com,http://www.mcmahanbenl.co.uk
Veness,Tyisha,Champagne Room,5396 Forth Street,Greets Green and Lyng Ward,West Midlands,B70 9DT,01547-429341,01290-367248,tyisha.veness@hotmail.com,http://www.champagneroom.co.uk


In [32]:
data.loc['Veness']

first_name                              Tyisha
company_name                    Champagne Room
address                      5396 Forth Street
city                Greets Green and Lyng Ward
county                           West Midlands
postal                                 B70 9DT
phone1                            01547-429341
phone2                            01290-367248
email                tyisha.veness@hotmail.com
web             http://www.champagneroom.co.uk
Name: Veness, dtype: object

In [36]:
len(data.index)

500

#  merge

# join