# SEPTA Data Project
#### William McKee
#### December 2017

SEPTA is a public agency responsible for the public transportation system in Philadelphia and its Pennsylvania suburbs.  SEPTA stands for Southeastern Pennsylvania Transportation Authority. 

This code analyzes the data set for SEPTA Bus and Rail lines downloaded from https://transitfeeds.com.  I downloaded the SEPTA Bus zip file and renamed gfts.zip to septa_bus_gfts.zip.  I downloaded the SEPTA Rail zip file and renamed gfts.zip to septa_rail_gfts.zip.

## Data Set Conversion

The code below checks the contents of both zip files, displays some zip file contents, and converts the files to csv format.

In [3]:
import zipfile
import csv
import os

def read_and_print_first_lines_from_zipped_file(zipfilename, limit):
    """
    Reads zip file and prints the first limit lines from each file contained in the zip file
    zipfilename = zip file name (such as 'example.zip')
    limit = number of lines to print in file
    """
    print()
    print("CONTENTS OF ZIP FILE " + zipfilename + ":")
    print()
    with zipfile.ZipFile(zipfilename, 'r') as z:
        file_name_list = sorted(z.namelist())
        for file in file_name_list:
            print(file)
            with z.open(file, 'r') as input_file:
                for line_number, line in enumerate(input_file):
                    if line_number > limit:
                        break
                    print(line)
            print()
    print()

# Loop through zip files
NUM_LINES = 5
ZIP_FILE_NAMES = ['septa_bus_gfts.zip', 'septa_rail_gfts.zip']
for file in ZIP_FILE_NAMES:
    # Read the zip files and display some file contents
    read_and_print_first_lines_from_zipped_file(file, NUM_LINES)

    # Extract zip file contents
    directory_name = os.path.splitext(file)[0]
    with zipfile.ZipFile(file, 'r') as zip_ref:
        zip_ref.extractall(directory_name)

    # Convert txt files to csv files
    os.chdir(directory_name)
    for input_file in os.listdir('.'):
        with open(input_file, 'r') as in_file:
            stripped = (line.strip() for line in in_file)
            lines = (line.split(",") for line in stripped if line)
            output_file = os.path.splitext(input_file)[0] + ".csv"
            print("Convert " + input_file + " contents to " + output_file)
            with open(output_file, 'w', ) as out_file:
                writer = csv.writer(out_file, lineterminator = '\n')
                writer.writerows(lines)
            
    # Remove original text files
    for item in os.listdir('.'):
        if item.endswith(".txt"):
            os.remove(item)

    os.chdir('..')


CONTENTS OF ZIP FILE septa_bus_gfts.zip:

agency.txt
b'agency_name,agency_url,agency_timezone,agency_lang,agency_fare_url\r\n'
b'SEPTA,http://www.septa.org,America/New_York,EN,http://www.septa.org/fares/transit/index.html'

calendar.txt
b'service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\r\n'
b'10,1,1,1,1,1,0,0,20170903,20180224\r\n'
b'11,0,0,0,0,0,0,0,20170903,20180224\r\n'
b'12,0,0,0,0,0,1,0,20170903,20180224\r\n'
b'13,0,0,0,0,0,0,1,20170903,20180224\r\n'
b'16,1,1,1,1,1,0,0,20170903,20180224\r\n'

calendar_dates.txt
b'service_id,date,exception_type\r\n'
b'10,20170904,2\r\n'
b'13,20170904,1\r\n'
b'16,20170904,2\r\n'
b'19,20170904,1\r\n'
b'22,20170904,2\r\n'

fare_attributes.txt
b'fare_id,price,currency_type,payment_method,transfers,transfer_duration\r\n'
b'1,2.50,USD,0,0,0\r\n'
b'2,3.50,USD,0,1,3600\r\n'
b'3,4.50,USD,0,2,3600\r\n'
b'13,7.00,USD,0,0,0\r\n'
b'14,8.00,USD,0,1,3600\r\n'

fare_rules.txt
b'fare_id,origin_id,destination_id\r\n'
b'1,1,1\