# CSV Module in Python:

https://docs.python.org/3/library/csv.html

In [None]:
import csv

with open("/content/sample_data/california_housing_test.csv") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
      print(row)
      for j in row:
        print(j)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
96900.000000
['-118.140000', '33.890000', '33.000000', '2867.000000', '786.000000', '1774.000000', '705.000000', '2.929200', '183400.000000']
-118.140000
33.890000
33.000000
2867.000000
786.000000
1774.000000
705.000000
2.929200
183400.000000
['-121.890000', '37.420000', '26.000000', '40.000000', '8.000000', '52.000000', '7.000000', '7.719700', '225000.000000']
-121.890000
37.420000
26.000000
40.000000
8.000000
52.000000
7.000000
7.719700
225000.000000
['-122.410000', '37.760000', '52.000000', '492.000000', '139.000000', '316.000000', '168.000000', '3.086500', '225000.000000']
-122.410000
37.760000
52.000000
492.000000
139.000000
316.000000
168.000000
3.086500
225000.000000
['-118.600000', '34.160000', '37.000000', '3441.000000', '584.000000', '1283.000000', '544.000000', '4.165600', '313100.000000']
-118.600000
34.160000
37.000000
3441.000000
584.000000
1283.000000
544.000000
4.165600
313100.000000
['-118.410000', '34.02

KeyboardInterrupt: ignored

# Reading CSV Files Into a Dictionary With csv

In [None]:
import csv

with open('/content/sample_data/california_housing_test.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are: \n{", ".join(row)}')
            line_count += 1
        print(f'{row["longitude"]}')#, {row["latitude"]}, {row["housing_median_age"]}, {row["total_rooms"]}, {row["total_bedrooms"]}, {row["population"]}, {row["households"]}, {row["median_income"]}, {row["median_house_value"]}')
        line_count += 1
        if line_count ==10:
          break
    print(f'Processed {line_count} lines.')

Column names are: 
longitude, latitude, housing_median_age, total_rooms, total_bedrooms, population, households, median_income, median_house_value
-122.050000, 37.370000, 27.000000, 3885.000000, 661.000000, 1537.000000, 606.000000, 6.608500, 344700.000000
-118.300000, 34.260000, 43.000000, 1510.000000, 310.000000, 809.000000, 277.000000, 3.599000, 176500.000000
-117.810000, 33.780000, 27.000000, 3589.000000, 507.000000, 1484.000000, 495.000000, 5.793400, 270500.000000
-118.360000, 33.820000, 28.000000, 67.000000, 15.000000, 49.000000, 11.000000, 6.135900, 330000.000000
-119.670000, 36.330000, 19.000000, 1241.000000, 244.000000, 850.000000, 237.000000, 2.937500, 81700.000000
-119.560000, 36.510000, 37.000000, 1018.000000, 213.000000, 663.000000, 204.000000, 1.663500, 67000.000000
-121.430000, 38.630000, 43.000000, 1009.000000, 225.000000, 604.000000, 218.000000, 1.664100, 67000.000000
-120.650000, 35.480000, 19.000000, 2310.000000, 471.000000, 1341.000000, 441.000000, 3.225000, 166900.0

# Optional Python CSV reader Parameters

ref: https://realpython.com/python-csv/

if we have a csv containing the following:

//

Row1: name,address,date joined

Row2: john smith,1132 Anywhere Lane Hoboken NJ, 07030,Jan 4

Row3: erica meyers,1234 Smith Lane Hoboken NJ, 07030,March 2

//

This CSV file contains three fields: name, address, and date joined, which are delimited by commas. The problem is that the data for the address field also contains a comma to signify the zip code.

There are three different ways to handle this situation:

##Use a different delimiter
That way, the comma can safely be used in the data itself. You use the delimiter optional parameter to specify the new delimiter.

##Wrap the data in quotes
The special nature of your chosen delimiter is ignored in quoted strings. Therefore, you can specify the character used for quoting with the quotechar optional parameter. As long as that character also doesn’t appear in the data, you’re fine.

##Escape the delimiter characters in the data
Escape characters work just as they do in format strings, nullifying the interpretation of the character being escaped (in this case, the delimiter). If an escape character is used, it must be specified using the escapechar optional parameter.



#Writing CSV Files With csv module

We can also write to a CSV file using a writer object and the ".write_row()" method:

In [None]:
import csv

with open('/content/emp_details.csv', mode='w') as employee_file:
    employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    employee_writer.writerow(['emp_name', 'dept', 'birth_month'])
    employee_writer.writerow(['John Smith', 'Accounting', 'November'])
    employee_writer.writerow(['Erica Meyers', 'IT', 'March'])
    print(employee_writer)

<_csv.writer object at 0x7fe24412ddb0>


#Wiriting CSV file, each row as dictionary item

In [None]:
import csv

with open('/content/employee_file2.csv', mode='w') as csv_file:
    fieldnames = ['emp_name', 'dept', 'birth_month']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    writer.writeheader()
    writer.writerow({'emp_name': 'John Smith', 'dept': 'Accounting', 'birth_month': 'November'})
    writer.writerow({'emp_name': 'Erica Meyers', 'dept': 'IT', 'birth_month': 'March'})

## for more hands-on practice, you can visit to
https://pynative.com/python/file-handling/

In [None]:
# https://pynative.com/python/file-handling/

In [None]:
#Copy Files and Directories in Python
# https://pynative.com/python-copy-files-and-directories/

#Pandas module for reading a CSV file

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/content/sample_data/california_housing_test.csv")

In [None]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


## for more practice on slicing a dataframe, you can visit
https://sparkbyexamples.com/pandas/how-to-slice-columns-in-pandas-dataframe/

In [None]:
# https://sparkbyexamples.com/pandas/how-to-slice-columns-in-pandas-dataframe/

##Using numpy and pandas togather to generate a random dataframe

In [None]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.arange(20).reshape(5,4), columns=["A","B","C","D"])
print(df)

    A   B   C   D
0   0   1   2   3
1   4   5   6   7
2   8   9  10  11
3  12  13  14  15
4  16  17  18  19


Select rows and columns using labels

In [None]:
df.loc[1:3,"A"]
##Alternatively we can do following:

# df["A"]

# df.A

1     4
2     8
3    12
Name: A, dtype: int64

Select a row by its label.

In [None]:
df.loc[0]

A    0
B    1
C    2
D    3
Name: 0, dtype: int64

Select multiple rows by label.

In [None]:
df.loc[[0,1]]

Unnamed: 0,A,B,C,D
0,0,1,2,3
1,4,5,6,7


Accessing values by row and column label.

In [None]:
df.loc[0,"D"]

3

Accessing values from multiple columns of same row.

In [None]:
df.loc[1,["A", "C"]]

A    4
C    6
Name: 1, dtype: int64

##For more practice on selction of values and indexes, you can visit:

https://www.opentechguides.com/how-to/article/pandas/193/index-slice-subset.html

In [None]:
# https://www.opentechguides.com/how-to/article/pandas/193/index-slice-subset.html