reference link : https://github.com/codebasics/py/tree/master/pandas

In [2]:
import pandas as pd
print(pd.__version__)

0.22.0


In [3]:
# Creating a datframe from csv
df0 = pd.read_csv('nyc_weather.csv')
df0[:10]

Unnamed: 0,EST,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
0,1/1/2016,38,23,52,30.03,10,8.0,0,5,,281
1,1/2/2016,36,18,46,30.02,10,7.0,0,3,,275
2,1/3/2016,40,21,47,29.86,10,8.0,0,1,,277
3,1/4/2016,25,9,44,30.05,10,9.0,0,3,,345
4,1/5/2016,20,-3,41,30.57,10,5.0,0,0,,333
5,1/6/2016,33,4,35,30.5,10,4.0,0,0,,259
6,1/7/2016,39,11,33,30.28,10,2.0,0,3,,293
7,1/8/2016,39,29,64,30.2,10,4.0,0,8,,79
8,1/9/2016,44,38,77,30.16,9,8.0,T,8,Rain,76
9,1/10/2016,50,46,71,29.59,4,,1.8,7,Rain,109


In [5]:
# Creating a datframe from excel file
df1 = pd.read_excel('weather_data.xlsx', 'Sheet1')
df1[:10]

Unnamed: 0,day,temperature,windspeed,event
0,2017-01-01,32,6,Rain
1,2017-01-02,35,7,Sunny
2,2017-01-03,28,2,Snow


In [8]:
# Creating a dataframe using python dictionary 
dict1 = {
    'date' :['2017/06/01','2016/06/02'],
    'temp' : [32,33],
    'wind' : [6,14]
}
df2 = pd.DataFrame(dict1)
df2[:10]

Unnamed: 0,date,temp,wind
0,2017/06/01,32,6
1,2016/06/02,33,14


In [15]:
# Creating a dataframe from list of tuples
tuples = [
    ('2017/06/01',32,7),
     ('2017/06/02',34,5)  
]
df3 = pd.DataFrame(tuples, columns = ['date','temp', 'wind'])
df3[:5]

Unnamed: 0,date,temp,wind
0,2017/06/01,32,7
1,2017/06/02,34,5


In [10]:
df = pd.read_csv('dummy.csv', header = 0) # header specifies the line on which header is preesent
df.head()

# if in case no column name is present than we can use
# header = None , names = ['col_name1','col_name2..']
# nrows = it is used to read only selective amount of rows
# na_values = ['lists of values','list of values'] <- it is used to convert values to NaN
# na_values = {'columns_names':['values to convert']}  <- it can be used to convert custom values to NaN


Unnamed: 0,ticker,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a,ratan tata


In [13]:
# replacing the missing or unknown values with NaN and only limiting the rows to 3
df = pd.read_csv('dummy.csv', header = 0, na_values = {'eps':'not available',
                                                       'revenue':-1,
                                                       'price':'n.a',
                                                       'people':'n.a'}, nrows = 3)
df

Unnamed: 0,ticker,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,
2,MSFT,-1.0,85,64,bill gates


In [14]:
# using converters to replace the missing values with a constant
def convert_people_cell(cell):
    if cell == 'n.a':
        return 'sam walton'
    return cell

df = pd.read_csv('dummy.csv', header = 0, converters = {'people':convert_people_cell})
df

Unnamed: 0,ticker,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,sam walton
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a,ratan tata


In [4]:
## writing back to csv
df.to_csv('filename.csv', index = False)

# index = False <- if we dont need index colum
# header = False <- if we dont need a header
# columns = ['col_name1', 'col_name2'..] <- it can be used in case we need to only write specific columns to csv

In [16]:
## Reading excel files in details 

## to deal with na in the people cell I am writng a function that will replace the empty cell with a constant value
def convert_people_cell(cell):
    if cell =='n.a':
        return 'sam walton'
    return cell

def convert_eps_cell(cell):
    if cell =='not available':
        return None
    return cell

def check_revenue(cell):
    if cell <=0:
        return None
    return cell

def check_price(cell):
    if cell =='n.a':
        return None
    return cell
    
df = pd.read_excel('dummy.xlsx', 'Sheet1', converters = {'people':convert_people_cell,
                                                        'eps':convert_eps_cell,
                                                        'revenue':check_revenue,
                                                        'price':check_price})
df

Unnamed: 0,ticker,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,sam walton
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


In [18]:
## writing to excel file

df.to_excel('new.xlsx', sheet_name = 'stocks', index = False)
print('success') 

# startrow = speifiy the starting row
# startcol = specify the starting col


success


In [21]:
# writing multiple dataframes into a single excel file
df_stocks = pd.DataFrame({
    'tickers': ['GOOGL', 'WMT', 'MSFT'],
    'price': [845, 65, 64 ],
    'pe': [30.37, 14.26, 30.97],
    'eps': [27.82, 4.61, 2.12]
})

df_weather =  pd.DataFrame({
    'day': ['1/1/2017','1/2/2017','1/3/2017'],
    'temperature': [32,35,28],
    'event': ['Rain', 'Sunny', 'Snow']
})

# using ExcelWriter to write the above dataframes into a single excel file with multiple sheets
with pd.ExcelWriter('stocks_weather.xlsx') as writer:
    df_stocks.to_excel(writer, sheet_name = 'stocks', index = False)
    df_weather.to_excel(writer, sheet_name = 'weather', index = False)

print('successfull written')

successfull written


For more functions: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html