### Read, process and transform csv file using pandas 

In [28]:
import pandas as pd
df = pd.read_csv("./sample_data_files/company_data.csv")
df

Unnamed: 0,"company information data file, 2019",Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,,,,,
1,Company,Number of Employee,Revenue,Stock Price,eps
2,AAA,2006,4005,35,4
3,BPT,23450,400056,67,0.3
4,Colobibi,not available,379,342,-1
5,Epson,56,-1,12,13.2
6,Jairon,891,659,n.a.,N.A.
7,McToba,1029,2049,9,7.7


In [29]:
df = pd.read_csv("./sample_data_files/company_data.csv", header = 2)
df

Unnamed: 0,Company,Number of Employee,Revenue,Stock Price,eps
0,AAA,2006,4005,35,4
1,BPT,23450,400056,67,0.3
2,Colobibi,not available,379,342,-1
3,Epson,56,-1,12,13.2
4,Jairon,891,659,n.a.,N.A.
5,McToba,1029,2049,9,7.7


In [30]:
df = pd.read_csv("./sample_data_files/company_data.csv", skiprows = 2)
df

Unnamed: 0,Company,Number of Employee,Revenue,Stock Price,eps
0,AAA,2006,4005,35,4
1,BPT,23450,400056,67,0.3
2,Colobibi,not available,379,342,-1
3,Epson,56,-1,12,13.2
4,Jairon,891,659,n.a.,N.A.
5,McToba,1029,2049,9,7.7


### company_data2.csv has no header

In [31]:
df = pd.read_csv("./sample_data_files/company_data2.csv")
df

Unnamed: 0,AAA,2006,4005,35,4
0,BPT,23450,400056,67,0.3
1,Colobibi,340,379,342,-1
2,Epson,56,-1,12,13.2
3,Jairon,891,659,N.A.,N.A.
4,McToba,1029,2049,9,7.7


In [32]:
df = pd.read_csv("./sample_data_files/company_data2.csv", header=None)
df

Unnamed: 0,0,1,2,3,4
0,AAA,2006,4005,35,4
1,BPT,23450,400056,67,0.3
2,Colobibi,340,379,342,-1
3,Epson,56,-1,12,13.2
4,Jairon,891,659,N.A.,N.A.
5,McToba,1029,2049,9,7.7


In [35]:
df = pd.read_csv("./sample_data_files/company_data2.csv", header=None, names=["Company", "Number of Employee",
                                                                              "Revenue", "Stock Price", "eps"])
df

Unnamed: 0,Company,Number of Employee,Revenue,Stock Price,eps
0,AAA,2006,4005,35,4
1,BPT,23450,400056,67,0.3
2,Colobibi,340,379,342,-1
3,Epson,56,-1,12,13.2
4,Jairon,891,659,N.A.,N.A.
5,McToba,1029,2049,9,7.7


In [19]:
df = pd.read_csv("./sample_data_files/company_data.csv", header = 2, nrows=2)
df

Unnamed: 0,Company,Number of Employee,Revenue,Stock Price,eps
0,AAA,2006,4005,35,4.0
1,BPT,23450,400056,67,0.3


In [36]:
df = pd.read_csv("./sample_data_files/company_data.csv", header = 2)
df

Unnamed: 0,Company,Number of Employee,Revenue,Stock Price,eps
0,AAA,2006,4005,35,4
1,BPT,23450,400056,67,0.3
2,Colobibi,not available,379,342,-1
3,Epson,56,-1,12,13.2
4,Jairon,891,659,n.a.,N.A.
5,McToba,1029,2049,9,7.7


### change fields with "not avaiable", "N.A." and "n.a" with NaN

In [21]:
df = pd.read_csv("./sample_data_files/company_data.csv", header=2, na_values=["not available", "n.a.", "N.A."])
df

Unnamed: 0,Company,Number of Employee,Revenue,Stock Price,eps
0,AAA,2006.0,4005,35.0,4.0
1,BPT,23450.0,400056,67.0,0.3
2,Colobibi,,379,342.0,-1.0
3,Epson,56.0,-1,12.0,13.2
4,Jairon,891.0,659,,
5,McToba,1029.0,2049,9.0,7.7


### convert the meaningless negative value (negative revenue) into NaN, but keep the meaningful negative value(negative eps)

In [37]:
df = pd.read_csv("./sample_data_files/company_data.csv", header=2, na_values={'Number of Employee': ["not available", "n.a.", "N.A."],
                                                                              'Revenue': ["not available", "n.a.", "N.A.", -1],
                                                                              'Stock Price': ["not available", "n.a.", "N.A."],
                                                                              'eps': ["not available", "n.a.", "N.A."]
                                                                             })
df

Unnamed: 0,Company,Number of Employee,Revenue,Stock Price,eps
0,AAA,2006.0,4005.0,35.0,4.0
1,BPT,23450.0,400056.0,67.0,0.3
2,Colobibi,,379.0,342.0,-1.0
3,Epson,56.0,,12.0,13.2
4,Jairon,891.0,659.0,,
5,McToba,1029.0,2049.0,9.0,7.7


### write the transformed file to new csv file 

In [24]:
df.to_csv("./sample_data_files/company_data_rev.csv")

In [25]:
df.to_csv("./sample_data_files/company_data_rev.csv", index=False)

In [26]:
df.to_csv("./sample_data_files/company_data_rev2.csv", index=False, columns=['Company', 'Stock Price'])

In [27]:
df.to_csv("./sample_data_files/company_data_rev2.csv", index=False, columns=['Company', 'Stock Price'],header=False)