[IO tools](https://pandas.pydata.org/docs/user_guide/io.html#io-tools-text-csv-hdf5)

In [3]:
import pandas as pd
import numpy as np
from io import StringIO

In [2]:
# CSV & text files
data = "col1,col2,col3\na,b,1\na,b,2\nc,d,3"
pd.read_csv(StringIO(data))

Unnamed: 0,col1,col2,col3
0,a,b,1
1,a,b,2
2,c,d,3


In [3]:
# usercols
pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ["COL1", "COL3"])

Unnamed: 0,col1,col3
0,a,1
1,a,2
2,c,3


In [4]:
# General parsing configuration
# skiprows
pd.read_csv(StringIO(data),skiprows=lambda x:x%2!=0)

Unnamed: 0,col1,col2,col3
0,a,b,2


In [1]:
# NA and missing data handling

In [5]:
data = "a,b,c,d\n1,2,3,4\n5,6,7,8\n9,10,11"
df = pd.read_csv(StringIO(data),dtype=object)

In [6]:
df

Unnamed: 0,a,b,c,d
0,1,2,3,4.0
1,5,6,7,8.0
2,9,10,11,


In [7]:
df['a'][0]

'1'

In [8]:
df = pd.read_csv(StringIO(data),dtype={'b':object,'c':np.float64,'d':'Int64'})
df

Unnamed: 0,a,b,c,d
0,1,2,3.0,4.0
1,5,6,7.0,8.0
2,9,10,11.0,


In [9]:
df.dtypes

a      int64
b     object
c    float64
d      Int64
dtype: object

In [10]:
df['a'][0]

1

In [13]:
data = 'col_1\n1\n2\n"A"\n4.22'
df = pd.read_csv(StringIO(data),converters={'col_1':str})
df

Unnamed: 0,col_1
0,1
1,2
2,A
3,4.22


In [14]:
df['col_1'].apply(type).value_counts()

<class 'str'>    4
Name: col_1, dtype: int64

In [15]:
df['col_1'].apply(type)

0    <class 'str'>
1    <class 'str'>
2    <class 'str'>
3    <class 'str'>
Name: col_1, dtype: object

In [16]:
# to_numeric
df2 = pd.read_csv(StringIO(data))
df2['col_1'] = pd.to_numeric(df2['col_1'],errors='coerce')

In [17]:
df2

Unnamed: 0,col_1
0,1.0
1,2.0
2,
3,4.22


In [18]:
df2['col_1'].apply(type).value_counts()

<class 'float'>    4
Name: col_1, dtype: int64