# Load Libraries

In [None]:
import os
import psycopg2
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.dates as mdates
import matplotlib.pylab
import matplotlib.pyplot as plt

from urllib import parse
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database

% matplotlib inline

# Load Brands Mapping

In [None]:
%run -i brands.py

# Connect to PostgreSQL

In [None]:
parse.uses_netloc.append('postgres')

path_1 = 'URL PATH PART 1'
path_2 ='URL PATH PART 2'

url = parse.urlparse(path_1+path_2)

# set up the connection
dbname = url.path[1:]
username = url.username
password = url.password
host = url.hostname
port = url.port

con = psycopg2.connect(database=dbname, user=username, 
                       password=password, host=host, port=port)

# Load Data

In [None]:
# items sql table
sql_query = """
SELECT *
FROM items
"""

items = pd.read_sql_query(sql_query, con)

In [None]:
# rental_items sql table
sql_query = """
SELECT *
FROM rental_items
"""

rental_items = pd.read_sql_query(sql_query, con)

### Drop unnecessary columns

In [None]:
items.drop(['title', 'description','main_image_id', 'updated_at',
            'main_image_id', 'user_id', 'slug', 'sku', 'fit_description',
            'removed', 'maintenance', 'approved', 'approved_by_id',
            'approved_at', 'year_purchased', 'listing_type', 'neighborhood',
            'delivery_option', 'promoted', 'status', 'time_zone', 'rack',
            'slot', 'review_count', 'review_rating', 'review_fit',
            'admin_notes', 'purchased_fake', 'photo_status',
            'condition', 'model', 'for_sale', 'sale_price',
            'only_for_sale', 'oversized', 'virtual_try_url',
            'designer_id'], axis=1, inplace=True)

In [None]:
rental_items.drop(['id', 'status', 'created_at', 'updated_at', 'fit_return',
                   'refunded', 'fit_return_reason', 'fit_return_notes'],
                  axis=1, inplace=True)

### Produce Orders Table

In [None]:
orders = rental_items.merge(items, how='left', left_on='item_id',
                            right_on='id').drop('id', axis=1)

### Rental Counts

In [None]:
# calculate the number of rentals per item

rental_count = orders.groupby('item_id', 
                              axis=0, 
                              as_index=False).count().rename(columns={'rental_id':'rental_count'})

rental_count = rental_count[['item_id', 'rental_count']]

### Rental Counts

In [None]:
# merge rental count with items

items = items.merge(rental_count, how='left',
                    left_on='id', right_on='item_id').drop('item_id', axis=1)

items['rental_count'] = items['rental_count'].fillna(value=0)

### Rental Revenue

In [None]:
items['rental_revenue'] = items.apply(lambda df: df['rental_count']*df['rent_per_week'], axis=1)

# Data Cleaning Pipeline

In [None]:
# remove erroneous samples

items = items.query('brand!="LENDER SUBMISSION FILL IN"').copy()

items = items.query('rent_per_week!=999985.0').copy()

In [None]:
# items that have not been removed
# receive curent date as removal time

items['removed_at'] = items['removed_at'].fillna(value=pd.to_datetime('now'))

# fix items with unrealistic rent per week values of 1000000.0

f = lambda df: 0.15*df['cost'] if df['rent_per_week']==1000000.0 else df['rent_per_week']

items['rent_per_week'] = items.apply(f, axis=1)

In [None]:
# clean brand names

items['brand'] = items['brand'].apply(lambda s: s.lower()).apply(lambda s: s.strip(' '))\
                               .apply(lambda s: s.replace('  ', ' '))\
                               .apply(lambda s: s.replace(u'\xa0', u' '))

items['brand'] = items['brand'].apply(lambda s: brands[s] if s in brands else s)

# Store Final Data

In [None]:
items.to_csv(path_or_buf='items.csv', sep=',', header=True,
             index=True, index_label=None, mode='w',
             line_terminator='\n')

In [None]:
orders.to_csv(path_or_buf='orders.csv', sep=',', header=True,
             index=True, index_label=None, mode='w',
             line_terminator='\n')