In [9]:
import pandas as pd
import numpy as np

df = pd.read_csv("myCSV/ex1.csv") 
df

#if file does not use header row, you 
pd.read_csv("myCSV/ex2.csv", header=None)

#you can explicitly specify col names
pd.read_csv("myCSV/ex2.csv", names=["a", "b", "c", "d", "message"])

#if you need message column to be the index
names = ["a", "b", "c", "d", "message"]
res = pd.read_csv("myCSV/ex2.csv", names=names, index_col="message")
res

Unnamed: 0_level_0,a,b,c,d
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hello,1,2,3,4
world,5,6,7,8
foo,9,10,11,12


In [11]:
#hierarchial index from multiple columns
parsed = pd.read_csv("myCSV/csv_mindex.csv", index_col=["key1", "key2"])
parsed

Unnamed: 0_level_0,Unnamed: 1_level_0,value1,value2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16


In [20]:
#if your csv does not have fixed delimiter, it can have white space or other pattern
#\s+ mean regular expression 
#because there is one fewer column name header than the columns in other rows, pandas decides the firts column to be DF index
result = pd.read_csv("myCSV/ex3.txt", sep="\s+")
result

#if you want to skip specific rows in the document
pd.read_csv("myCSV/ex4.csv", skiprows=[0, 2, 3])

#if there is missing values in your document, pandas uses NA or NULL as default value to fill for those missing data
result = pd.read_csv("myCSV/ex5.csv")
result
#looking for null values in dataframe
pd.isna(result)

#you can disable NAN or NULL value in DF
result2 = pd.read_csv("myCSV/ex5.csv", keep_default_na=False)
result2

#or specify which you want to show as NULL or NaN
result3 = pd.read_csv("myCSV/ex5.csv", keep_default_na=False, na_values=["NA"])
result3

#you can specify by sentinels in which columns which data will be replaced by NaN
sentinels = {"message": ["foo", "NA"], "something": ["two"]}
pd.read_csv("myCSV/ex5.csv", na_values=sentinels, keep_default_na=False)

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,,5,6,,8,world
2,three,9,10,11.0,12,


In [13]:
import pandas as pd
import numpy as np

pd.options.display.max_rows = 10
result = pd.read_csv("myCSV/ex6.csv")
result

#if you want to show only n number of rows
pd.read_csv("myCSV/ex6.csv", nrows=5)

#if you need to read a file in a pieces, then:
chunker = pd.read_csv("myCSV/ex6.csv", chunksize=1000)
type(chunker)

tot = pd.Series([], dtype="int64")
for p in chunker:
    tot = tot.add(p["key"].value_counts(), fill_value=0)
    
tot = tot.sort_values(ascending=False)
tot[:10]

for i in chunker:
    print(i.shape)

#This gets the first 100 rows, running through a loop gets the next 100 rows and so on.
reader = pd.read_csv("myCSV/ex6.csv", iterator=True)
reader.get_chunk(100)



Unnamed: 0,one,two,three,four,key
0,0.467976,-0.038649,-0.295344,-1.824726,L
1,-0.358893,1.404453,0.704965,-0.200638,B
2,-0.501840,0.659254,-0.421691,-0.057688,G
3,0.204886,1.074134,1.388361,-0.982404,R
4,0.354628,-0.133116,0.283763,-0.837063,Q
...,...,...,...,...,...
95,1.106521,0.098153,0.789793,1.192693,T
96,-0.540543,1.782569,0.051931,0.463868,Q
97,-0.101980,0.981720,1.106990,-1.752269,M
98,0.632107,-0.761419,1.427930,-0.046928,F


In [18]:
#write data in text format
data = pd.read_csv("myCSV/ex5.csv")

#now write in another file
data.to_csv("myCSV/outputFile.csv")

#you can specify different delimiter to the output
import sys
data.to_csv(sys.stdout, sep="-")

#if you want to denote the NULL values instead of empty fields
data.to_csv(sys.stdout, sep="-", na_rep="NULL")

#if you want to disable both header columns and indexes
data.to_csv(sys.stdout, sep="-", na_rep="NULL", index=False, header=False)

#if you want to specify only subset of columns to be shown
data.to_csv(sys.stdout, sep="-", na_rep="NULL", index=False, header=False, columns=["a","b","c"])

-something-a-b-c-d-message
0-one-1-2-3.0-4-
1-two-5-6--8-world
2-three-9-10-11.0-12-foo
-something-a-b-c-d-message
0-one-1-2-3.0-4-NULL
1-two-5-6-NULL-8-world
2-three-9-10-11.0-12-foo
one-1-2-3.0-4-NULL
two-5-6-NULL-8-world
three-9-10-11.0-12-foo
1-2-3.0
5-6-NULL
9-10-11.0


In [30]:
#other delimited formats other than pandas
import csv
f = open("myCSV/ex7.csv")
reader = csv.reader(f)

for lin in reader:
    print(lin)
    
f.close()
#######################################################
#######################################################
#######################################################
#you can collect pieces of rows as you wants
with open("myCSV/ex7.csv") as f:
    lines = list(csv.reader(f))
    
header, data = lines[0], lines[1:]

#now try to create dictionary by your own
data_dict = {head: dt for head, dt in zip(header, zip(*data))}
data_dict

#######################################################
#######################################################
#######################################################
#different dialect
f = open("myCSV/ex7.csv")

class my_dialect(csv.Dialect):
    lineterminator = "\n"
    delimiter = ";"
    quotechar = '"'
    quoting = csv.QUOTE_MINIMAL
    
reader = csv.reader(f, dialect=my_dialect)
for l in reader:
    print(l)

#######################################################
#######################################################
#######################################################
#cleanup

with open("myCSV/mydata.csv", "w") as f:
    writer = csv.writer(f, dialect=my_dialect)
    writer.writerow(("one", "two", "three"))
    writer.writerow(("1", "2", "3"))
    writer.writerow(("4", "5", "6"))
    writer.writerow(("7", "8", "9"))
    
f1 = open("myCSV/mydata.csv")
reader = csv.reader(f1)
for lin in reader:
    print(lin)

['a', 'b', 'c']
['1', '2', '3']
['1', '2', '3']
['a,"b","c"']
['1,"2","3"']
['1,"2","3"']
['one;two;three']
['1;2;3']
['4;5;6']
['7;8;9']
