# Reading Writing Excel CSV files

In [1]:
import pandas as pd

# Read CSV

In [14]:
df = pd.read_csv("Data_Set\\stock_data.csv")
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [5]:
# Sometime there is a situation when in the csv file there are some rows at the top which are unnecessary and
# you don't want to read. We can do this in two ways

# skiprows = number     -> skiprows = 1 (skip one row from the top)
# header = number       -> header = 1 (my header is located at row number one)

df = pd.read_csv("Data_Set\\stock_data.csv", skiprows=1)
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [6]:
df = pd.read_csv("Data_Set\\stock_data.csv", header=1)
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [7]:
# There are the situation when your CSV file don't have the header (name of columns). We can do this

# header = None  automatically generate the column names like (0,1,2,3)

df = pd.read_csv("Data_Set\\stock_data.csv", header=None)
df

Unnamed: 0,0,1,2,3,4
0,tickers,eps,revenue,price,people
1,GOOGL,27.82,87,845,larry page
2,WMT,4.61,484,65,n.a.
3,MSFT,-1,85,64,bill gates
4,RIL,not available,50,1023,mukesh ambani
5,TATA,5.6,-1,n.a.,ratan tata


In [8]:
# We can also provide the column names

# names =["--","--","--"]

df = pd.read_csv("Data_Set\\stock_data.csv", header=None, names=["TICKER","EPS","REVENUE","PRICE","PEOPLE"])
df

Unnamed: 0,TICKER,EPS,REVENUE,PRICE,PEOPLE
0,tickers,eps,revenue,price,people
1,GOOGL,27.82,87,845,larry page
2,WMT,4.61,484,65,n.a.
3,MSFT,-1,85,64,bill gates
4,RIL,not available,50,1023,mukesh ambani
5,TATA,5.6,-1,n.a.,ratan tata


In [11]:
# Read limited lines from the CSV

df = pd.read_csv("Data_Set\\stock_data.csv", nrows=3)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1.0,85,64,bill gates


In [13]:
# There are the situations when couple of values in the dataframe are (not available, n.a.). We can replace
# these values with NaN

# na_values=["--","--"] This means while reading the CSV file when you encounter the written values turn these values 
# into NaN  

df = pd.read_csv("Data_Set\\stock_data.csv", na_values=["not available","n.a."])
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


In [31]:
# We can also replace the spcific value in the dataframe with another value instead of gererally replace the values 
# in the whole data frame

# In the upper dataframe the revenue in the fourth row is (-1) but revenue can't be -1 if we convert this value
# to NaN using the na_values[] list it will also convert the -1 in eps column into NaN 
# How can we achieve this instead of giving the list we can give the dictionary

df = pd.read_csv("Data_Set\\stock_data.csv", na_values={
    'eps' : ["not available","n.a."],
    'revenue' : ["not available","n.a.",-1],
    'price' : ["not available","n.a."],
    'people' : ["not available","n.a."]
    
})
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


# Write CSV

In [23]:
# We can write CSV using to_csv() 

# By default it wrote the index

df.to_csv("Data_Set\\new.csv")

In [24]:
df = pd.read_csv("Data_Set\\new.csv")
df

Unnamed: 0.1,Unnamed: 0,tickers,eps,revenue,price,people
0,0,GOOGL,27.82,87.0,845.0,larry page
1,1,WMT,4.61,484.0,65.0,
2,2,MSFT,-1.0,85.0,64.0,bill gates
3,3,RIL,,50.0,1023.0,mukesh ambani
4,4,TATA,5.6,,,ratan tata


In [26]:
# If we have to not write the index

df.to_csv("Data_Set\\new.csv",index=False)

In [27]:
df = pd.read_csv("Data_Set\\new.csv")
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


In [29]:
# Writing only specific columns

df.to_csv("Data_Set\\new.csv",index=False,columns=['tickers','eps'])

In [30]:
df = pd.read_csv("Data_Set\\new.csv")
df

Unnamed: 0,tickers,eps
0,GOOGL,27.82
1,WMT,4.61
2,MSFT,-1.0
3,RIL,
4,TATA,5.6


In [32]:
# Sometimes we don't want to write the header in CSV

df.to_csv("Data_Set\\new.csv",index=False,header=False)    

In [33]:
df = pd.read_csv("Data_Set\\new.csv")
df

Unnamed: 0,GOOGL,27.82,87.0,845.0,larry page
0,WMT,4.61,484.0,65.0,
1,MSFT,-1.0,85.0,64.0,bill gates
2,RIL,,50.0,1023.0,mukesh ambani
3,TATA,5.6,,,ratan tata


# Read Excel