In [3]:
import httplib2
import oauth2
import urllib3
import types
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2, psycopg2.extras
import httplib
import base64
import json # For Microsoft Face API
import urllib as urllib # For Microsoft Face API
import time 
import csv
import datetime 

class UpworkDataFormatter:
    
    def __init__(self):
        # Set the present date for collecting the data
        self.current_date = "12/31/2017"
        self.current_date = datetime.datetime.strptime(self.current_date, "%m/%d/%Y")
        
        # File name
        self.actual_rates_file_name = "./csv_files/upwork_actual_rates_2017_12_12.csv"
        self.skilltest_tenure_file_name = "./csv_files/upwork_skilltest_tenure_2017_12_12.csv"
        self.hr_tenure_file_name = "./csv_files/upwork_hr_tenure_2017_12_12.csv"
        self.fp_tenure_file_name = "./csv_files/upwork_fp_tenure_2017_12_12.csv"
        
        self.save_upwork_tenure_to_csv('skilltest_tenure', self.skilltest_tenure_file_name )
        self.save_upwork_tenure_to_csv('hr_tenure', self.hr_tenure_file_name)
        self.save_upwork_tenure_to_csv('fp_tenure', self.fp_tenure_file_name)
        self.save_upwork_tenure_to_csv('actual_rates', self.actual_rates_file_name)
    
    def connect_to_database(self):
        conn = psycopg2.connect("dbname=eureka01")
        cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
        psycopg2.extensions.register_adapter(dict, psycopg2.extras.Json)
        return cur
                
    def save_upwork_tenure_to_csv(self, field, filename):
        cur = self.connect_to_database().execute("SELECT detailed_info FROM upwork_unitedstates_allskills_2017_12_12;")
        
        if field == 'actual_rates':
        
            with open(filename, 'w') as csvfile:
                user_count = 1
                fieldnames = ['user_count','all_rates', 'mean_rate', 'median_rate']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()

                for user in cur:
                    try: 
                        hr_tenure = self.calculate_assignment_tenure(user, "hr")
                        all_rates = hr_tenure[3]
                        mean_rate = hr_tenure[4]
                        median_rate = hr_tenure[5]

                        writer.writerow({'user_count': user_count,'all_rates': all_rates, 
                                         'mean_rate': mean_rate, 'median_rate': median_rate})

                    except KeyboardInterrupt:
                        print "We stoppped"
                        break

                    except Error as error:
                        print error
                        writer.writerow({'user_count': user_count,'all_rates': "None", 
                                         'mean_rate': "None", 'median_rate': "None"})
                    print "Finished user {0} for {1}".format(user_count, field)
                    user_count += 1
        
        elif field == 'skilltest_tenure':
            with open(filename, 'w') as csvfile:
                user_count = 1
                fieldnames = ['user_count', field]
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()

                for user in cur:
                    try: 
                        field_value = self.calculate_skilltest_tenure(user)

                        writer.writerow({'user_count': user_count, field: field_value})

                    except KeyboardInterrupt:
                        print "We stoppped"
                        break

                    except Error as error:
                        print error
                        writer.writerow({'user_count': user_count, field: "None"})
                        
                    print "Finished user {0} for {1}".format(user_count, field)
                    user_count += 1
                    
        else: #For calculating hr and fp tenure 
            with open(filename, 'w') as csvfile:
                user_count = 1
                fieldnames = ['user_count', field]
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()

                for user in cur:
                    try: 
                        value = self.calculate_skilltest_tenure(self, field[:2])
                        writer.writerow({'user_count': user_count, field: value})

                    except KeyboardInterrupt:
                        print "We stoppped"
                        break

                    except Error as error:
                        print error
                        writer.writerow({'user_count': user_count, field: "None"})
                        print "Finished user {0} for {1}".format(user_count, field)
                        user_count += 1
                
    # Calculate the earliest skill test date on Upwork, returns Upwork tenure in days, months and years as list
    def calculate_skilltest_tenure(self, user):
        
        try:
            skilltest_list = user[0]["tsexams"]["tsexam"]
        except:
            return "None"
        
        all_dates = []
        earliest_date = ""
        
        if (type(skilltest_list) is list): 
            for test in skilltest_list:
                date = test['ts_when']
                date_datetime = datetime.datetime.strptime(date, "%m/%d/%Y")
                all_dates.append(date_datetime)
            earliest_date = min(all_dates)
        
        elif (type(skilltest_list) is dict):
            date = skilltest_list['ts_when']
            date_datetime = datetime.datetime.strptime(date, "%m/%d/%Y")
            earliest_date = date_datetime
        
        # Calculate number of months on Upwork
        tenure_days = (self.current_date - earliest_date).days
        tenure_months = tenure_days/30
        tenure_years = tenure_days/365
        return [tenure_days, tenure_months, tenure_years]
    
    # Returns Upwork tenure based on earliest end date for a fixed price or hourly assignment, and average pay for hourly assignments 
    def calculate_assignment_tenure(self, user, hr_or_fp): # Set last attribute to hr or fp 
        try: 
            assignments_list = user[0]["assignments"][hr_or_fp]["job"]
        except:
            return "None"
        
        all_end_dates = []
        earliest_date = ""
        
        if (type(assignments_list) is list): # If more than one assignment reported
            for assignment in assignments_list:
                end_date = assignment["as_to_full"]
                if (end_date == "Present"):
                    end_date = self.current_date
                    end_datetime = end_date
                else:
                    end_datetime = datetime.datetime.strptime(end_date, "%m/%d/%Y")
                
                all_end_dates.append(end_datetime)
            earliest_date = min(all_end_dates)

        elif (type(assignments_list) is dict): # If only one assignment reported
            end_date = assignments_list["as_to_full"]
            if (end_date == "Present"):
                end_date = self.current_date
                end_datetime = end_date
            else:
                end_datetime = datetime.datetime.strptime(end_date, "%m/%d/%Y")
            earliest_date = end_datetime
        
        # Calculate tenure from earliest work date
        tenure_days = (self.current_date - earliest_date).days
        tenure_months = tenure_days/30
        tenure_years = tenure_days/365
        
        # Calculate average pay for hourly assignments
        if hr_or_fp == "hr":
            all_rates = []
            mean_rate = 0
            if (type(assignments_list) is list): # If more than one assignment reported
                for assignment in assignments_list:
                    rate = assignment["as_rate"]
                    rate = float(rate[1:])
                    all_rates.append(rate)
                
                mean_rate = round(np.array(all_rates).mean(),2)
                median_rate = round(np.median(np.array(all_rates)), 2)
                
            elif (type(assignments_list) is dict):
                rate = assignments_list["as_rate"]
                rate = float(rate[1:])
                rate = round(rate, 2)
                mean_rate = rate
                median_rate = rate
                all_rates.append(mean_rate)
        elif hr_or_fp == "fp":
            mean_rate = "None"
            all_rates = "None"
            median_rate = "None"
        
        return [tenure_days, tenure_months, tenure_years, all_rates, mean_rate, median_rate]
        

myObject = UpworkDataFormatter()

TypeError: 'NoneType' object is not iterable