In [1]:
import requests
import pandas as pd
from google.cloud import storage
from io import StringIO
from config_variables import var_credentials_location, var_gcs_connector, var_gcs_bronze_bucket



Creating Test URL

In [2]:
base_url = r"https://cycling.data.tfl.gov.uk/ActiveTravelCountsProgramme/1-Strategic%20counts%20(CIO)"

# 2015-Central.csv
year = 2015
area = "Central"

url = f'{base_url}/{year}-{area}.csv'

In [3]:
# Making GET request to download the CSV file
response = requests.get(url)

In [4]:
# Check if the request was successful
if response.status_code == 200:
    print("Success")

df = pd.read_csv(StringIO(response.text))
df.head()

Success


Unnamed: 0,Year,UnqID,Date,Weather,Time,Day,Round,Dir,Path,Mode,Count
0,2015,CENCY001,04/06/2015,Dry,06:00:00,Weekday,A,Northbound,,Private cycles,3
1,2015,CENCY001,04/06/2015,Dry,06:15:00,Weekday,A,Northbound,,Private cycles,10
2,2015,CENCY001,04/06/2015,Dry,06:30:00,Weekday,A,Northbound,,Private cycles,18
3,2015,CENCY001,04/06/2015,Dry,06:45:00,Weekday,A,Northbound,,Private cycles,39
4,2015,CENCY001,04/06/2015,Dry,07:00:00,Weekday,A,Northbound,,Private cycles,65


In [43]:
def upload_to_gcs(credentials_location: str, bucket: str, object_name: str, df_data: pd.DataFrame, file_name:str) -> None:
    ''' 
    Function to upload data from local DataFrame to Google Cloud Storage
    '''

    df_data.to_csv(file_name, index=False, header=True)

    storage_client = storage.Client.from_service_account_json(credentials_location)
    gc_bucket = storage_client.bucket(bucket)
    blob = gc_bucket.blob(object_name)
    blob.upload_from_string(df_data.to_csv(index=False, header=True), content_type='text/csv')
    
    return None


In [44]:
def web_to_df(year: int, area: str) -> pd.DataFrame:
    '''
    Function to take data from online data source and upload to GCS
    '''

    url = f'{base_url}/{year}-{area}.csv'

    response = requests.get(url)
    
    df = pd.read_csv(StringIO(response.text))    
    
    return df


In [45]:
test_df = web_to_df(2015, 'Central')

test_df.head()

test_df.to_csv("test.csv", index=False, header=True)

In [46]:
upload_to_gcs(var_credentials_location, var_gcs_bronze_bucket, "test/test1", test_df, "test.csv")

In [47]:
print("Jeff")

Jeff
