# CSV

In [1]:
import pandas as pd
import numpy as np
from io import StringIO , BytesIO

In [5]:
data = ('col1,col2,col3\n'
           'x,y,1\n'
           'a,b,2\n'
           'c,d,3')

In [6]:
type(data)

str

In [13]:
print(data)

col1,col2,col3
x,y,1
a,b,2
c,d,3


In [7]:
pd.read_csv(StringIO(data))

Unnamed: 0,col1,col2,col3
0,x,y,1
1,a,b,2
2,c,d,3


In [10]:
## Read from Specific columns
# df=pd.read_csv(StringIO(data),usecols=lambda x : x.upper() in ['col1','col3'])
df=pd.read_csv(StringIO(data), usecols=['col1','col3'])

In [11]:
df

Unnamed: 0,col1,col3
0,x,1
1,a,2
2,c,3


In [12]:
# convert data frame to csv
df.to_csv('Test_Csv_File.csv')

In [14]:
## Specifying data type of Columns
data = ('a,b,c,d\n'
           '1,2,3,4\n'
           '5,6,7,8\n'
           '9,10,11,12\n')

In [15]:
print(data)

a,b,c,d
1,2,3,4
5,6,7,8
9,10,11,12



In [16]:
## making data type of csv file as object
df=pd.read_csv(StringIO(data),dtype=object) 

In [17]:
df

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12


In [20]:
df['a'][1] ## All data in csv will be string

'5'

In [22]:
df['a']  ## column is object

0    1
1    5
2    9
Name: a, dtype: object

In [21]:
type(df)

pandas.core.frame.DataFrame

In [31]:
## making data type of csv file as int
df=pd.read_csv(StringIO(data),dtype=int) 

In [32]:
df

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12


In [33]:
df['a']

0    1
1    5
2    9
Name: a, dtype: int32

In [34]:
df['a'][1]

5

In [35]:
## making data type of csv file as float
df=pd.read_csv(StringIO(data),dtype=float) 

In [36]:
df

Unnamed: 0,a,b,c,d
0,1.0,2.0,3.0,4.0
1,5.0,6.0,7.0,8.0
2,9.0,10.0,11.0,12.0


In [37]:
df['a']

0    1.0
1    5.0
2    9.0
Name: a, dtype: float64

In [38]:
## making different data type of different column in csv file
df=pd.read_csv(StringIO(data),dtype={'a' : int,'c' : 'Int64','b' : float})

In [39]:
df

Unnamed: 0,a,b,c,d
0,1,2.0,3,4
1,5,6.0,7,8
2,9,10.0,11,12


In [40]:
df['a']

0    1
1    5
2    9
Name: a, dtype: int32

In [41]:
df['c']

0     3
1     7
2    11
Name: c, dtype: Int64

In [42]:
df['b']

0     2.0
1     6.0
2    10.0
Name: b, dtype: float64

In [44]:
## check all data types of column
df.dtypes

a      int32
b    float64
c      Int64
d      int64
dtype: object

In [45]:
## Index columns and training delimiters


In [46]:
data= ('Index,a,b,c\n'
          '4,apple,bat,5.7\n'
          '8,orange,cow,10')

In [48]:
pd.read_csv(StringIO(data))

Unnamed: 0,Index,a,b,c
0,4,apple,bat,5.7
1,8,orange,cow,10.0


In [49]:
## making first column as index
pd.read_csv(StringIO(data),index_col=0)

Unnamed: 0_level_0,a,b,c
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,apple,bat,5.7
8,orange,cow,10.0


In [50]:
pd.read_csv(StringIO(data),index_col=2)

Unnamed: 0_level_0,Index,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bat,4,apple,5.7
cow,8,orange,10.0


In [53]:
data= ('a,b,c\n'
          '4,apple,bat,\n'
          '8,orange,cow,')

In [54]:
pd.read_csv(StringIO(data))

Unnamed: 0,a,b,c
4,apple,bat,
8,orange,cow,


In [55]:
pd.read_csv(StringIO(data),index_col=False)

Unnamed: 0,a,b,c
0,4,apple,bat
1,8,orange,cow


In [56]:
## Combining usecols and index_col
data= ('a,b,c\n'
          '4,apple,bat,\n'
          '8,orange,cow,')

In [59]:
pd.read_csv(StringIO(data),usecols=['b','c'],index_col=False)

Unnamed: 0,b,c
0,apple,bat
1,orange,cow


In [64]:
## Quoting and escaping characters
data = 'a,b\n"hello, \\"Bob \\",nice to see you",5'

In [65]:
pd.read_csv(StringIO(data),escapechar = '\\')

Unnamed: 0,a,b
0,"hello, ""Bob "",nice to see you",5


In [66]:
## URL to CSV
df=pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item',sep='\t')

In [67]:
df.head()

Unnamed: 0,item_code,item_name,display_level,selectable,sort_sequence
0,AA0,All items - old base,0,T,2
1,AA0R,Purchasing power of the consumer dollar - old ...,0,T,400
2,SA0,All items,0,T,1
3,SA0E,Energy,1,T,375
4,SA0L1,All items less food,1,T,359
