In [55]:
import os
import boto3
import pandas as pd
from botocore.exceptions import ClientError
from sqlalchemy import create_engine

FILE_KEYS = ['bengals.csv', 'boyd_receiving.csv', 'chase_receiving.csv', 'higgins_receiving.csv']
BUCKET_NAME = 'mindex-data-analytics-code-challenge'
tempfiles = {}
merged_frames = pd.DataFrame()


#Getting S3 access credentials
access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')

if not aws_access_key_id or not aws_secret_access_key:
    print("AWS environment variables are not set.")

#Getting s3 client
s3_client = boto3.client('s3', 
    aws_access_key_id=access_key_id, 
    aws_secret_access_key=secret_access_key)

#Testing Bucket exists
try:
    s3_client.head_bucket(Bucket=BUCKET_NAME)
    print(f"The bucket '{BUCKET_NAME}' exists.")
except ClientError as exception:
    if exception.response['Error']['Code'] == '404':
        print(f"The bucket '{BUCKET_NAME}' does not exist.")
    else:
        print(f"An error occurred: {exception}")

#ET
for key in FILE_KEYS:
    file_path = f'temp_{key.replace("/", "_")}'
    tempfiles[key] = file_path
    
    try:
        s3_client.download_file(BUCKET_NAME, key, file_path)
        print(f"File downloaded: {key}")
    except Exception as exception:
        print(f"An error occured downloading {key}: {exception}")
        
    try:
        df = pd.read_csv(file_path)
        #print(df)
    except Exception as exception:
        print(f"An error occured reading {file_path}")

    if merged_frames.empty:
        #initializing merged_frames df
        merged_frames = df
    else:
        #merge all other frames
        merged_frames = pd.merge(merged_frames, df, on='Week', how='inner')
        
        #cleaning up column names
        to_remove = "_receiving.csv"
        column_prefix = f"{key.replace(to_remove, '')}"
        column_yards = f"{column_prefix}_Yards"
        column_tds = f"{column_prefix}_TDs"
        merged_frames.rename(columns={'Yards': column_yards, 'TD': column_tds}, inplace=True)

#remapping Win/Loss values in Result column
win_loss_mapping = {'1.0': 'Win', '0.0': 'Loss'}
merged_frames['Result'] = merged_frames['Result'].astype(str).replace(win_loss_mapping)

#L



print(merged_frames)

while True:
    user_input = input("Delete temporary files?")

    if user_input.lower() == "yes" or user_input.lower() == "y": 
        for key in FILE_KEYS:
            os.remove(tempfiles[key])
            print(f"{tempfiles[key]} deleted.")
        break;
    elif user_input.lower() == "no" or user_input.lower() == "n" : 
        print("Exiting program")
        break;
    else:
        print("Invalid input.")
        

The bucket 'mindex-data-analytics-code-challenge' exists.
File downloaded: bengals.csv
File downloaded: boyd_receiving.csv
File downloaded: chase_receiving.csv
File downloaded: higgins_receiving.csv
     Week Opponent Location Result  boyd_Yards  boyd_TDs  chase_Yards  \
0    REG1      MIN     Home    Win          32         0          101   
1    REG2      CHI     Away   Loss          73         0           54   
2    REG5       GB     Home   Loss          24         0          159   
3    REG6      DET     Away    Win           7         0           97   
4    REG7      BAL     Away    Win          39         0          201   
5    REG8      NYJ     Away   Loss          69         1           32   
6    REG9      CLE     Home   Loss          11         0           49   
7   REG11       LV     Away    Win          49         0           32   
8   REG12      PIT     Home    Win          13         0           39   
9   REG13      LAC     Home   Loss          85         0           52  

Delete temporary files? y


temp_bengals.csv deleted.
temp_boyd_receiving.csv deleted.
temp_chase_receiving.csv deleted.
temp_higgins_receiving.csv deleted.
