In [57]:
import pandas as pd

In [58]:
df = pd.read_csv("stock_data.csv")
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


### Skips rows from top of the datas from reading
    (skiprows = rowNumber)
    original header is also considered a row
rowIndex = rowNumber - 1

rowIndex is now considered as new Header for the datas

In [59]:
df = pd.read_csv("stock_data.csv", skiprows = 2)
df

Unnamed: 0,WMT,4.61,484,65,n.a.
0,MSFT,-1,85,64,bill gates
1,RIL,not available,50,1023,mukesh ambani
2,TATA,5.6,-1,n.a.,ratan tata


### Specify a row as new Header from the data
    (header = rowNumber)
rowIndex = rowNumber - 1

rowIndex is now considered as new Header for the datas

In [60]:
df = pd.read_csv("stock_data.csv", header=2)
df

Unnamed: 0,WMT,4.61,484,65,n.a.
0,MSFT,-1,85,64,bill gates
1,RIL,not available,50,1023,mukesh ambani
2,TATA,5.6,-1,n.a.,ratan tata


### Create Custom Header for the data
    (header=None, names = ["headerName", "headerName", ....])

In [61]:
df = pd.read_csv("stock_data.csv", skiprows=1, header=None, names=["tiker", "eps", "revenue", "price", "people"])
df

Unnamed: 0,tiker,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


### Read Limited Data from CSV file
    (nrows = rowNumber)

rowIndex = rowNumber - 1

read data upto selected rowIndex from the file excluding Header

In [62]:
df = pd.read_csv("stock_data.csv", nrows = 3)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1.0,85,64,bill gates


### Clean up messy data from file such as "not available" and "n.a." with NaN using na_values

        (na_values = ["messyData_1", "messyData_2"])

NOTE: cleans up all the header messyDatas replacing with NaN

In [63]:
df = pd.read_csv("stock_data.csv", na_values=["not available", "n.a."])
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


    (na_values = {
        "columnName_1": ["messyData_1", "messyData_2"],
        "columnName_2": ["messyData_1", "messyData_2", "messyData_3],
        })

NOTE: cleans up only specified header i.e columnName messyDatas replacing with NaN

In [64]:
df = pd.read_csv("stock_data.csv", na_values = {
    "eps" : ["not available", "n.a."],
    "revenue" : ["not available", "n.a.", -1],
    "people" : ["not available", "n.a."],   
    "price" : ["not available", "n.a.", -1],   
})
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


### Create New CSV File
    .to_csv("fileSource", index = True, header = True, columns = ["columnName_1", "columnName_2"])

By default there get index number when we create a new csv file and header too

If the file is already exist, it overwites the file

No Index

    .to_csv("fileSource", index=False)

In [65]:
df.to_csv("new.csv", index=False)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


### No Index and No Header

    .to_csv("fileSource", index=False, header=False)

In [66]:
df.to_csv("new.csv", index=False, header=False)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


### Custom Headers

    .to_csv("fileSource", columns=["columnName_1", "columnName_2"])

In [67]:
df.to_csv("new.csv", index=False, columns=["tickers", "eps"])
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


### Converters arguement in read_excel() mehtod
            .read_excel("fileSource", sheetName = "sheetName", converters = {
                "columnName1": function1,
                "columnName2": function2,
            })
Convert messy or unwanted datas to required custom data

In [68]:
def conver_people_cell(cell):
    if cell == "n.a.":
        return "sam walton"
    return cell

def conver_eps_cell(cell):
    if cell == "not available":
        return None
    return cell

df = pd.read_excel("stock_data.xlsx", sheet_name = "Sheet1", converters = {
    "people": conver_people_cell,
    "eps": conver_eps_cell
})
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,sam walton
2,MSFT,-1.0,85,64,bill gates
3,RIL,,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


### start row and start column
        .to_excel("fileSource", sheet_ame="sheetName", startrow = rowNumber, startcol = columnNumber)

In [None]:
df.to_excel("new.xlsx", sheet_name="stocks", index=False, startrow=1, startcol=2)
    

### use ExcelWriter() class
        with pd.ExcelWriter("fileSource") as writerName:
            df1.to_excel(writerName, sheet_name="Sheet1")
            df2.to_excel(writerName, sheet_name="Sheet2")
To write the datas in same file but in different sheets

In [75]:
df_stocks = pd.DataFrame({
    "tickers": ["GOOGL", "WMT", "MSFT"],
    "price": [845, 65, 64],
    "pe": [30.37, 14.26, 30.97],
    "eps": [27.82, 4.61, 2.12]
})

df_weather = pd.DataFrame({
    "day": ["1/1/2017", "1/2/2017", "1/3/2017"],
    "temperature": [32, 35, 28],
    "event": ["Rain", "Sunny", "Snow"]
})


with pd.ExcelWriter("stocks_weather.xlsx") as writer:
    df_stocks.to_excel(writer, sheet_name="stocks")
    df_weather.to_excel(writer, sheet_name="weather")