### This is how we read and write CSV file using Pandas

In [9]:
import pandas as pd
df = pd.read_csv('stock_data.csv')

print(df.columns)    # to print all columns
df

Index(['tickers', 'eps', 'revenue', 'price', 'people'], dtype='object')


Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [10]:
df1 = pd.read_csv('stock_data.csv',skiprows=1)  # if I want to skip the extra row from top
df1

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [12]:
# read 3 rows from csv to dataframe
df2 = pd.read_csv('stock_data.csv',nrows=3)
df2

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1.0,85,64,bill gates


In [15]:
df4 = pd.read_csv('stock_data.csv',na_values=["not available","n.a."])  # this is useful cleaning up messy data
df4    # here we are supplying a list. 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


### Cleaning up messy data (Data Munging / Data Wrangling)

In [17]:
# to change -1 in revenue column
# here we can supply a dictionary instead of list. 
df5 = pd.read_csv('stock_data.csv',na_values={'eps':['not available'],
                                             'revenue':[-1],
                                              'price':['n.a.'],
                                              'people':['n.a.']
                                             })
df5

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


### Writing back to CSV

In [20]:
# Here we'll use df5 our new dataframe to write to a new csv file.  
df5.to_csv('new.csv',index=False)   # it will not write the index to new  csv file

In [21]:
# to write few columns to csv file
df5.columns


Index(['tickers', 'eps', 'revenue', 'price', 'people'], dtype='object')

In [25]:
df5.to_csv('new2.csv',columns=['tickers','eps','people'],index=False,header=False) # now it has no header in the new csv file

### use of Converter in CSV while reading reading column values

In [34]:
# this is the function, when it sees not available in eps, then it returns None
def convert_eps_cell(cell):
    if cell == 'not available':
        return None
    return cell

# this function, when it sees n.a. in people then return 'Sam walton'
def convert_people_cell(cell):
    if cell == 'n.a.':
        return 'Sam Walton'
    return cell

# Now we'll pass these two functions in converter function. 
df6 = pd.read_csv('stock_data.csv',converters={
    'eps':convert_eps_cell,
    'people':convert_people_cell
})
df6

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,Sam Walton
2,MSFT,-1.0,85,64,bill gates
3,RIL,,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [35]:
# We can write this data frame to new csv file too
df6.to_csv('new3.csv',index=False)