In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("stock_data.csv")
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [3]:
df = pd.read_csv("stock_data.csv", skiprows=1)
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [4]:
df = pd.read_csv("stock_data.csv", header=1) # skiprows and header are kind of same
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [8]:
# setting explicit headers
df = pd.read_csv("stock_data.csv", header=None)
df

Unnamed: 0,0,1,2,3,4
0,tickers,eps,revenue,price,people
1,GOOGL,27.82,87,845,larry page
2,WMT,4.61,484,65,n.a.
3,MSFT,-1,85,64,bill gates
4,RIL,not available,50,1023,mukesh ambani
5,TATA,5.6,-1,n.a.,ratan tata


In [9]:
df = pd.read_csv("stock_data.csv", header=None, names=['Ticker', 'EPS', 'Revenue', 'People'])
df

Unnamed: 0,Ticker,EPS,Revenue,People
tickers,eps,revenue,price,people
GOOGL,27.82,87,845,larry page
WMT,4.61,484,65,n.a.
MSFT,-1,85,64,bill gates
RIL,not available,50,1023,mukesh ambani
TATA,5.6,-1,n.a.,ratan tata


In [10]:
# Number of rows of a file to read. (Read only specified number of rows)
df = pd.read_csv("stock_data.csv", nrows=2) # Fetches only two rows
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.


In [12]:
df = pd.read_csv("stock_data.csv")
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [14]:
# Handling NA, not avialble, n.a, N/A etc kind of data
# Will be replaced by NaN - na_values argument can be scalar, str, list-like, or dict, default None
df = pd.read_csv("stock_data.csv", na_values=["n.a.", 'not available'])
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


In [17]:
# Can explicitly mention what should be filled for Na or any values
df = pd.read_csv("stock_data.csv", na_values={
                    'eps': ['not avaliable'],
                    'revenue': [-1],
                    'people': ['not available', 'n.a.', 'ratan tata']
                })
df # For price column still n.a exists and for 'ratan tata' NaN value is filled

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845,larry page
1,WMT,4.61,484.0,65,
2,MSFT,-1,85.0,64,bill gates
3,RIL,not available,50.0,1023,mukesh ambani
4,TATA,5.6,,n.a.,


In [19]:
# A new cs file will be genrated in the current folder
df.to_csv('final_data.csv', index=False)

In [20]:
df.to_csv('final_data.csv', header=False)

In [21]:
df = pd.read_excel("stock_data.xlsx", "Sheet1")
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [22]:
# explicit fill value via method definition
def convert_people_cell(cell):
    """ If there are any n.a. values it will be swapped with value default string given """
    if cell == 'n.a.':
        return 'A default People Name'
    return cell

def convert_price_cell(cell):
    """ If there are any n.a. values it will be swapped with value 50 """
    if cell == 'n.a.':
        return 50
    return cell

df = pd.read_excel("stock_data.xlsx", "Sheet1", converters={
                    'people': convert_people_cell,
                    'price': convert_price_cell
                    })
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,A default People Name
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,50,ratan tata


In [24]:
# Writing two dataframes to two seperate sheets in excel

df_stocks = pd.DataFrame({
    'tickers': ['Google', 'WMT', 'MSFT'],
    'price': [845, 65, 64],
    'pe': [30.37, 14.26, 30.97],
    'eps': [27.82, 4.61, 2.12]
})

df_weather =  pd.DataFrame({
    'day': ['1/1/2017','1/2/2017','1/3/2017'],
    'temperature': [32,35,28],
    'event': ['Rain', 'Sunny', 'Snow']
})

In [25]:
with pd.ExcelWriter("Combined_Data.xlsx") as writer:
    df_stocks.to_excel(writer, sheet_name="stocks")
    df_weather.to_excel(writer, sheet_name="weather")