In [1]:
import json, requests, boto3
import pandas as pd
from pandas.io.json import json_normalize

In [2]:
## declare variables
url = 'https://raw.githubusercontent.com/localytics/data-viz-challenge/master/data.json'
columnList = ['age', 'device', 'date', 'gender', 'amount', 'location.state']

In [3]:
## class to extract/transform/load 

class dataviz:
    def __init__(self, url, columnList):
        self.url = url
        self.list = columnList
        self.df = None
        self.df2 = None
        
    def importData(self):
        data = pd.read_json(self.url, orient='columns')
        df = pd.read_json((data['data']).to_json(), orient='index') 
        self.df = df.join(json_normalize(df['location'].tolist()).add_prefix('location.')).drop(['location'], axis=1)
        
    
    def transformData(self):
        self.df.fillna(0, inplace=True)
        self.df['date'] = self.df['client_time'].dt.date
        self.df = self.df[self.list]
        
    def filterData(self):
        self.df = self.df.loc[self.df['gender'] == 'F']
        self.df = self.df.loc[self.df['location.state'] == 'CA']                   
                             
    def aggregateData(self):
        self.df2 = self.df.groupby(['age', 'device', 'date', 'gender'])['amount'].agg(['count', 'sum']).reset_index()
        self.df2 = self.df2.drop(['gender'], axis=1)
        
    def writeToS3(self):
        self.df2.set_index('date', inplace=True)
        self.df2.to_csv('total_events.csv')
        
        # Send data set to S3 bucket
        client = boto3.client('s3')

        client.upload_file(
            Filename = 'total_events.csv', 
            Bucket = 'dakindre-ecosia', 
            Key = 'total_events.csv',
            ExtraArgs = {
              'StorageClass': 'STANDARD_IA'
            }
        )
        
    def testPrintData(self):
        print(self.df2.head(30))
         

In [4]:
def main():
    data = dataviz(url, columnList)
    data.importData()
    data.transformData()
    data.filterData()
    data.aggregateData()
    data.writeToS3()

In [5]:
if __name__== "__main__":
    main()