# READING CSV FILES

There are multiple ways to read a CSV file in Python, depending on the arguments you would like to use. One popular library to read CSV files is the pandas library. Here are a few examples of how to read a CSV file using pandas

In [4]:
import pandas as pd
data=pd.read_csv('/content/sample_data/california_housing_test.csv')

data

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.30,34.26,43.0,1510.0,310.0,809.0,277.0,3.5990,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0
...,...,...,...,...,...,...,...,...,...
2995,-119.86,34.42,23.0,1450.0,642.0,1258.0,607.0,1.1790,225000.0
2996,-118.14,34.06,27.0,5257.0,1082.0,3496.0,1036.0,3.3906,237200.0
2997,-119.70,36.30,10.0,956.0,201.0,693.0,220.0,2.2895,62000.0
2998,-117.12,34.10,40.0,96.0,14.0,46.0,14.0,3.2708,162500.0


In [5]:
#print a few rows
print(data.iloc[2:5])

   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
2    -117.81     33.78                27.0       3589.0           507.0   
3    -118.36     33.82                28.0         67.0            15.0   
4    -119.67     36.33                19.0       1241.0           244.0   

   population  households  median_income  median_house_value  
2      1484.0       495.0         5.7934            270500.0  
3        49.0        11.0         6.1359            330000.0  
4       850.0       237.0         2.9375             81700.0  


In [19]:
# Different delimiters - tab-separated file (.tsv):
data = pd. read_csv ('/content/sample_data/california_housing_test.csv', sep='\t')
data

Unnamed: 0,"longitude,""latitude"",""housing_median_age"",""total_rooms"",""total_bedrooms"",""population"",""households"",""median_income"",""median_house_value"""
0,"-122.050000,37.370000,27.000000,3885.000000,66..."
1,"-118.300000,34.260000,43.000000,1510.000000,31..."
2,"-117.810000,33.780000,27.000000,3589.000000,50..."
3,"-118.360000,33.820000,28.000000,67.000000,15.0..."
4,"-119.670000,36.330000,19.000000,1241.000000,24..."
...,...
2995,"-119.860000,34.420000,23.000000,1450.000000,64..."
2996,"-118.140000,34.060000,27.000000,5257.000000,10..."
2997,"-119.700000,36.300000,10.000000,956.000000,201..."
2998,"-117.120000,34.100000,40.000000,96.000000,14.0..."


In [22]:
# Different delimiters - space-separated file:
data = pd.read_csv('/content/sample_data/california_housing_test.csv', delim_whitespace=True)
data

Unnamed: 0,"longitude,""latitude"",""housing_median_age"",""total_rooms"",""total_bedrooms"",""population"",""households"",""median_income"",""median_house_value"""
0,"-122.050000,37.370000,27.000000,3885.000000,66..."
1,"-118.300000,34.260000,43.000000,1510.000000,31..."
2,"-117.810000,33.780000,27.000000,3589.000000,50..."
3,"-118.360000,33.820000,28.000000,67.000000,15.0..."
4,"-119.670000,36.330000,19.000000,1241.000000,24..."
...,...
2995,"-119.860000,34.420000,23.000000,1450.000000,64..."
2996,"-118.140000,34.060000,27.000000,5257.000000,10..."
2997,"-119.700000,36.300000,10.000000,956.000000,201..."
2998,"-117.120000,34.100000,40.000000,96.000000,14.0..."


In [7]:
#Don't use first row for column names:
data = pd. read_csv ('/content/sample_data/california_housing_test.csv', header=None)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
1,-122.050000,37.370000,27.000000,3885.000000,661.000000,1537.000000,606.000000,6.608500,344700.000000
2,-118.300000,34.260000,43.000000,1510.000000,310.000000,809.000000,277.000000,3.599000,176500.000000
3,-117.810000,33.780000,27.000000,3589.000000,507.000000,1484.000000,495.000000,5.793400,270500.000000
4,-118.360000,33.820000,28.000000,67.000000,15.000000,49.000000,11.000000,6.135900,330000.000000
...,...,...,...,...,...,...,...,...,...
2996,-119.860000,34.420000,23.000000,1450.000000,642.000000,1258.000000,607.000000,1.179000,225000.000000
2997,-118.140000,34.060000,27.000000,5257.000000,1082.000000,3496.000000,1036.000000,3.390600,237200.000000
2998,-119.700000,36.300000,10.000000,956.000000,201.000000,693.000000,220.000000,2.289500,62000.000000
2999,-117.120000,34.100000,40.000000,96.000000,14.000000,46.000000,14.000000,3.270800,162500.000000


In [11]:
#Specify column names:
data = pd. read_csv ('/content/sample_data/california_housing_test.csv', names= ['population', 'latitude'])
data

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,population,latitude
longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
-122.050000,37.370000,27.000000,3885.000000,661.000000,1537.000000,606.000000,6.608500,344700.000000
-118.300000,34.260000,43.000000,1510.000000,310.000000,809.000000,277.000000,3.599000,176500.000000
-117.810000,33.780000,27.000000,3589.000000,507.000000,1484.000000,495.000000,5.793400,270500.000000
-118.360000,33.820000,28.000000,67.000000,15.000000,49.000000,11.000000,6.135900,330000.000000
...,...,...,...,...,...,...,...,...
-119.860000,34.420000,23.000000,1450.000000,642.000000,1258.000000,607.000000,1.179000,225000.000000
-118.140000,34.060000,27.000000,5257.000000,1082.000000,3496.000000,1036.000000,3.390600,237200.000000
-119.700000,36.300000,10.000000,956.000000,201.000000,693.000000,220.000000,2.289500,62000.000000
-117.120000,34.100000,40.000000,96.000000,14.000000,46.000000,14.000000,3.270800,162500.000000


In [15]:
# Custom missing values:
data = pd.read_csv ('/content/sample_data/california_housing_test.csv', na_values= ['population', 991])
data

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.30,34.26,43.0,1510.0,310.0,809.0,277.0,3.5990,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0
...,...,...,...,...,...,...,...,...,...
2995,-119.86,34.42,23.0,1450.0,642.0,1258.0,607.0,1.1790,225000.0
2996,-118.14,34.06,27.0,5257.0,1082.0,3496.0,1036.0,3.3906,237200.0
2997,-119.70,36.30,10.0,956.0,201.0,693.0,220.0,2.2895,62000.0
2998,-117.12,34.10,40.0,96.0,14.0,46.0,14.0,3.2708,162500.0
