# CSV read and write with pandas

# Document

<table align="left">
    <tr>
        <th class="text-align:left">Title</th>
        <td class="text-align:left">CSV read and write in pandas</td>
    </tr>
    <tr>
        <th class="text-align:left">Last modified</th>
        <td class="text-align:left">2018-09-11</td>
    </tr>
    <tr>
        <th class="text-align:left">Author</th>
        <td class="text-align:left">Gilles Pilon <gillespilon13@gmail.com></td>
    </tr>
    <tr>
        <th class="text-align:left">Status</th>
        <td class="text-align:left">Active</td>
    </tr>
    <tr>
        <th class="text-align:left">Type</th>
        <td class="text-align:left">Jupyter notebook</td>
    </tr>
    <tr>
        <th class="text-align:left">Created</th>
        <td class="text-align:left">2018-09-10</td>
    </tr>
    <tr>
        <th class="text-align:left">File name</th>
        <td class="text-align:left">csv_read_write_pandas.ipynb</td>
    </tr>
    <tr>
        <th class="text-align:left">Other files required</th>
        <td class="text-align:left"></td>
    </tr>
</table>

# In brevi

The purpose of this Jupyter notebook is to illustrate how to read and write CSV files with pandas.

# Code

## Import libraries

In [1]:
# Import librairies
import pandas as pd

  return f(*args, **kwds)
  return f(*args, **kwds)


## Create dataframe

In [2]:
# Create a dataframe to write and read.
data = {'stagefirst':  ['Moe', 'Larry', 'Curly', 'Shemp', 'Curly Joe'],
        'stagelast':   ['Howard', 'Fine', '.', 'Howard', 'DeRita'],
        'realfirst':   ['Moses Harry', 'Louis', 'Jerome Lester', 'Samuel', 'Joseph'],
        'reallast':    ['Horwitz', 'Feinberg', 'Horwitz', 'Horwitz', 'Wardell'],
        'age':         [78, 73, 49, 60, 84],
        'height':      [114, 124, 131, '.', '.'],
        'weight':      ['25,000', '94,000', 157, 162, 170]
       }
df = pd.DataFrame.from_dict(data, orient='columns')
df

Unnamed: 0,stagefirst,stagelast,realfirst,reallast,age,height,weight
0,Moe,Howard,Moses Harry,Horwitz,78,114,25000
1,Larry,Fine,Louis,Feinberg,73,124,94000
2,Curly,.,Jerome Lester,Horwitz,49,131,157
3,Shemp,Howard,Samuel,Horwitz,60,.,162
4,Curly Joe,DeRita,Joseph,Wardell,84,.,170


## Write dataframe

In [3]:
# Write the dataframe to the current working directory.
df.to_csv('data.csv')

## Read dataframe

In [4]:
# Read a data file to a dataframe.
df = pd.read_csv('data.csv')
df

Unnamed: 0.1,Unnamed: 0,stagefirst,stagelast,realfirst,reallast,age,height,weight
0,0,Moe,Howard,Moses Harry,Horwitz,78,114,25000
1,1,Larry,Fine,Louis,Feinberg,73,124,94000
2,2,Curly,.,Jerome Lester,Horwitz,49,131,157
3,3,Shemp,Howard,Samuel,Horwitz,60,.,162
4,4,Curly Joe,DeRita,Joseph,Wardell,84,.,170


In [5]:
# Read a data file to a dataframe without column names.
df= pd.read_csv('data.csv', header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,,stagefirst,stagelast,realfirst,reallast,age,height,weight
1,0.0,Moe,Howard,Moses Harry,Horwitz,78,114,25000
2,1.0,Larry,Fine,Louis,Feinberg,73,124,94000
3,2.0,Curly,.,Jerome Lester,Horwitz,49,131,157
4,3.0,Shemp,Howard,Samuel,Horwitz,60,.,162
5,4.0,Curly Joe,DeRita,Joseph,Wardell,84,.,170


In [6]:
# Read a data file to a dataframe. Specify the column names.
df = pd.read_csv('data.csv',
                 names = ['ID', 'Stage First', 'Stage Last', 'Real First', 'Real Last', 'Age', 'Ht', 'Wt'])
df

Unnamed: 0,ID,Stage First,Stage Last,Real First,Real Last,Age,Ht,Wt
0,,stagefirst,stagelast,realfirst,reallast,age,height,weight
1,0.0,Moe,Howard,Moses Harry,Horwitz,78,114,25000
2,1.0,Larry,Fine,Louis,Feinberg,73,124,94000
3,2.0,Curly,.,Jerome Lester,Horwitz,49,131,157
4,3.0,Shemp,Howard,Samuel,Horwitz,60,.,162
5,4.0,Curly Joe,DeRita,Joseph,Wardell,84,.,170


In [7]:
# Read a data file to a dataframe. Specify the column labels.
# Skip the first row of the file, which contains column names.
df = pd.read_csv('data.csv',
                 skiprows=1,
                 names = ['ID', 'Stage First', 'Stage Last', 'Real First', 'Real Last', 'Age', 'Ht', 'Wt'])
df

Unnamed: 0,ID,Stage First,Stage Last,Real First,Real Last,Age,Ht,Wt
0,0,Moe,Howard,Moses Harry,Horwitz,78,114,25000
1,1,Larry,Fine,Louis,Feinberg,73,124,94000
2,2,Curly,.,Jerome Lester,Horwitz,49,131,157
3,3,Shemp,Howard,Samuel,Horwitz,60,.,162
4,4,Curly Joe,DeRita,Joseph,Wardell,84,.,170


In [8]:
# Read a data file to a dataframe. Specify the column labels.
# Skip the first row of the file, which contains column names.
# Set the index column to ID.
df = pd.read_csv('data.csv',
                 skiprows=1, index_col='ID',
                 names = ['ID', 'Stage First', 'Stage Last', 'Real First', 'Real Last', 'Age', 'Ht', 'Wt'])
df

Unnamed: 0_level_0,Stage First,Stage Last,Real First,Real Last,Age,Ht,Wt
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,Moe,Howard,Moses Harry,Horwitz,78,114,25000
1,Larry,Fine,Louis,Feinberg,73,124,94000
2,Curly,.,Jerome Lester,Horwitz,49,131,157
3,Shemp,Howard,Samuel,Horwitz,60,.,162
4,Curly Joe,DeRita,Joseph,Wardell,84,.,170


In [9]:
# Read a data file to a dataframe. Specify the column labels.
# Skip the first row of the file, which contains column names.
# Set the index column to first and last names.
df = pd.read_csv('data.csv', index_col=[1,2])
df.index.names = ['Stage First', 'Stage Last']
df.columns = ['ID', 'Real First', 'Real Last', 'Age', 'Ht', 'Wt']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,ID,Real First,Real Last,Age,Ht,Wt
Stage First,Stage Last,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Moe,Howard,0,Moses Harry,Horwitz,78,114,25000
Larry,Fine,1,Louis,Feinberg,73,124,94000
Curly,.,2,Jerome Lester,Horwitz,49,131,157
Shemp,Howard,3,Samuel,Horwitz,60,.,162
Curly Joe,DeRita,4,Joseph,Wardell,84,.,170


In [10]:
# Read a data file to a dataframe. Specify the column labels.
# Skip the first row of the file, which contains column names.
# Set the index column to first and last names.
df = pd.read_csv('data.csv', index_col=[1,2])
df.index.names = ['Stage First', 'Stage Last']
df.columns = ['ID', 'Real First', 'Real Last', 'Age', 'Ht', 'Wt']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,ID,Real First,Real Last,Age,Ht,Wt
Stage First,Stage Last,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Moe,Howard,0,Moses Harry,Horwitz,78,114,25000
Larry,Fine,1,Louis,Feinberg,73,124,94000
Curly,.,2,Jerome Lester,Horwitz,49,131,157
Shemp,Howard,3,Samuel,Horwitz,60,.,162
Curly Joe,DeRita,4,Joseph,Wardell,84,.,170


In [11]:
# Read a data file. Specify '.' as NaN.
df = pd.read_csv('data.csv', na_values=['.'])
df

Unnamed: 0.1,Unnamed: 0,stagefirst,stagelast,realfirst,reallast,age,height,weight
0,0,Moe,Howard,Moses Harry,Horwitz,78,114.0,25000
1,1,Larry,Fine,Louis,Feinberg,73,124.0,94000
2,2,Curly,,Jerome Lester,Horwitz,49,131.0,157
3,3,Shemp,Howard,Samuel,Horwitz,60,,162
4,4,Curly Joe,DeRita,Joseph,Wardell,84,,170


In [12]:
pd.isnull(df)

Unnamed: 0.1,Unnamed: 0,stagefirst,stagelast,realfirst,reallast,age,height,weight
0,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False
2,False,False,True,False,False,False,False,False
3,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,True,False


In [13]:
# Read a data file. Specify '.' and 'NA' as missing values.
notable = {'last': ['.', 'NA'], 'height': ['.']}
df = pd.read_csv('data.csv', na_values=notable)
df

Unnamed: 0.1,Unnamed: 0,stagefirst,stagelast,realfirst,reallast,age,height,weight
0,0,Moe,Howard,Moses Harry,Horwitz,78,114.0,25000
1,1,Larry,Fine,Louis,Feinberg,73,124.0,94000
2,2,Curly,.,Jerome Lester,Horwitz,49,131.0,157
3,3,Shemp,Howard,Samuel,Horwitz,60,,162
4,4,Curly Joe,DeRita,Joseph,Wardell,84,,170


In [14]:
# Read a data file. Interpret strings around numbers as 'thousands' separators.
df = pd.read_csv('data.csv', thousands=',')
df

Unnamed: 0.1,Unnamed: 0,stagefirst,stagelast,realfirst,reallast,age,height,weight
0,0,Moe,Howard,Moses Harry,Horwitz,78,114,25000
1,1,Larry,Fine,Louis,Feinberg,73,124,94000
2,2,Curly,.,Jerome Lester,Horwitz,49,131,157
3,3,Shemp,Howard,Samuel,Horwitz,60,.,162
4,4,Curly Joe,DeRita,Joseph,Wardell,84,.,170
