In [None]:
# Read file using open: open(filename, mode)
# filename: absolute or relative path to the file
# mode: r (read), w (write), a (append), r+ (read and write)

In [4]:
f = open('Dataset/agencies.txt', 'r') # relative path
# f = open('/Volumes/GoogleDrive/.../Dataset/agencies.txt', 'r') # absolute path

In [None]:
f = open('Dataset/agencies.txt', 'r') # relative path
# Read all content of the file
content = f.read()
print(content)
print(type(content))

In [1]:
f = open('Dataset/agencies.txt', 'r') # relative path
fw = open('Dataset/authorities.txt', 'w') # open to write
# Read all lines of the file (cost memory)
lines = f.readlines()

for i in range(len(lines)):
    if 'authority' in lines[i].lower():
        print(i, lines[i][:-1]) # remove the last character (\n)
        fw.write(lines[i]) # write to the file

fw.close()

2 The Triboro Bridge and Tunnel Authority
4 The Metropolitan Transit Authority
5 The Port Authority of New York and New Jersey


In [None]:
f = open('Dataset/agencies.txt', 'r') # relative path
# Read one line at a time (slower but more memory efficient)
line = f.readline()
while line:
    print(line[:-1])
    line = f.readline()

In [13]:
f = open('Dataset/falling.txt', 'r')
# Skip the first 5 lines
for i in range(5):
    f.readline()

line = f.readline()
falling_data = []

while line:
    data_point, time, height, uncertainty = line.split()
    falling_data.append([float(data_point), float(time), float(height), float(uncertainty)])
    line = f.readline()

print(falling_data)

[[0.0, 0.0, 180.0, 3.5], [1.0, 0.5, 185.0, 4.5], [2.0, 0.7, 192.0, 5.2]]


In [15]:
import numpy as np
falling_data = np.loadtxt('Dataset/falling.txt', skiprows=5)
print(falling_data)

[[  0.    0.  180.    3.5]
 [  1.    0.5 185.    4.5]
 [  2.    0.7 192.    5.2]]


In [16]:
data_points, times, heights, uncertainties = np.loadtxt('Dataset/falling.txt', skiprows=5, unpack=True)
for i in range(len(times)):
    print(times[i], heights[i])

0.0 180.0
0.5 185.0
0.7 192.0


In [17]:
times, uncertainties = np.loadtxt('Dataset/falling.txt', skiprows=5, usecols=(1, 3), unpack=True)
print(times)
print(uncertainties)

[0.  0.5 0.7]
[3.5 4.5 5.2]


In [None]:
import numpy as np
codes, names, grades = np.loadtxt('Dataset/gradebook.csv', skiprows=1, usecols=(2, 3, 4), 
                    unpack=True, delimiter=',', dtype=str)
# empty grade => 0.0
grades[grades == ''] = '0.0'
# empty code => GCHXXXX
codes[codes == ''] = 'GCHXXXXX'
# grade > 10 => 10.0
grades = np.array(list(map(lambda x: float(x), grades)))
grades[grades > 10.0] = 10.0
#print(grades)
for i in range(len(codes)):
    print(f'{codes[i]:10}{names[i]:20}{grades[i]:5.2f}')

grades_data = list(zip(codes, names, grades)) # zip 3 columns into 2d list
header_str = 'Code,Name,Grade'  # header string for csv file

np.savetxt('Dataset/gradebook_clean.csv', grades_data, fmt='%s', delimiter=',', 
           header=header_str, comments='')

In [13]:
import csv
f = open('Dataset/gradebook.csv', 'r', encoding='utf-8', newline='')
fw = open('Dataset/gradebook_short.csv', 'w', encoding='utf-8', newline='')

grades_data = []

reader = csv.reader(f)
writer = csv.writer(fw)
writer.writerow(['Code', 'Name', 'Grade'])

for row in reader:
    grades_data.append(row)

for row in grades_data[1:]:
    if row[4] == '':
        row[4] = '0.0'
    if row[2] == '':
        row[2] = 'GCHXXXXX'
    if float(row[4]) > 10.0:
        row[4] = '10.0'

    print(f'{row[2]:5}{row[3]:20}{row[4]:5}')
    writer.writerow([row[2], row[3], row[4]])

fw.close()

GBH17538Le Minh Huong       6.5  
GCH16025Bui Phuong Nam      0    
GCH16336Nguyen The Dat      6.5  
GCH16388Pham Minh Thang     9.5  
GCH16573Do Duy Tung         6.5  
GCH16602Nguyen Hoang Minh   0.0  
GCH16603Nguyen Thanh Thang  8.0  
GCH16604Tran Duc Le         3.8  
GCHXXXXXNguyen Minh Tuan    10.0 


In [None]:
# Read authors.txt, filter authors that uses gmail and print their names & email
# Example output (25 spaces for name, 25 spaces for email):
# Name            Email
# Paul Bakaus     paul.bakaus@gmail.com
# Richard Worth   rdworth@gmail.com
# Yehuda Katz     wycats@gmail.com
f = open('Dataset/authors.txt', 'r')
lines = f.readlines()
print(f'{"Name":25}{"Email":25}')
for line in lines[3:]:
    name, email = line.split('<')
    if 'gmail' in email.lower():
        print(f'{name.strip():25}{email[:-2]:25}')

In [None]:
reader = csv.reader(open('Dataset/pizza_data.csv', 'r', encoding='utf-8'))
header = next(reader)
del header[0]   # remove the first column (id) of header
del header[7]   # remove the url column of header

with open('Dataset/pizza_data_clean.csv', 'w', encoding='utf-8', newline='') as fw:
    writer = csv.writer(fw)
    writer.writerow(header)
    for row in reader:
        if len(row[9]) * len(row[10]) * len(row[18]) * len(row[19]) == 0:
            continue
        row[2] = row[2].split(',')[0]
        del row[0]
        del row[7]
        writer.writerow(row)
        
    