In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', 400)

In [2]:
import joblib
from sklearn.tree import DecisionTreeRegressor

In [3]:
specs = pd.read_csv('../data/specs_clean.csv')
specs = specs[specs['original_price']<3000] # resale model only valid in this range
len(specs)

311

In [4]:
resale_price_model = joblib.load('../data/resale_price_model.pkl')
prep_pipeline = joblib.load('../data/prep_pipeline.pkl')

### Predict and append resale price for each lens

In [5]:
specs['brand'].unique()

array(['leica', 'canon', 'sony', 'samsung', 'fujifilm', 'nikon', 'tamron',
       'olympus', 'venus', 'panasonic', 'tokina', 'pentax', 'samyang',
       'voigtlander', 'sigma', 'nikkor', 'rokinon'], dtype=object)

In [6]:
# replace all but sony, fujifilm, panasonic and nikon
specs.replace(['leica', 'canon', 'samsung', 'tamron','olympus', 'venus', 'tokina', 'pentax', 'samyang','voigtlander', 'sigma', 'nikkor', 'rokinon'], 'other', inplace=True)

In [7]:
x_all = specs[['original_price', 'flen_max', 'flen_min', 'f_min','brand', 'announce_date']]
x_all_prepd = prep_pipeline.transform(x_all)
specs['resale_price'] = resale_price_model.predict(x_all_prepd)

In [8]:
import matplotlib.pyplot as plt

plt.hist(specs['resale_price']/specs['original_price'], bins=20);

### Append usage scores

In [9]:
usage_opts = {'wildlife','landscape','portraits','low_light'}
for usage in usage_opts:
    usage_stats = pd.read_csv('../data/usage_'+usage+'.csv')
    df_ = pd.DataFrame(usage_stats['lens_id'].value_counts().reset_index()) 
    df_.columns = ['lens_id', usage+'_score']
    specs = pd.merge(specs, df_, on='lens_id', how='left')
    specs[usage+'_score'] = specs[usage+'_score'].fillna(0)

In [10]:
specs.head()

Unnamed: 0,lens_id,brand,original_price,announce_date,format,flen_min,flen_max,image_stabilization,f_min,elements,...,maximum_magnification,autofocus,weight,sealing,image_href,resale_price,wildlife_score,portraits_score,low_light_score,landscape_score
0,leica_t_55-135_3p5-4p5,other,2095.0,2014,1.33,55.0,135.0,False,3.5,12,...,-1.0,True,500,False,https://4.img-dpreview.com/files/p/TS375x375~products/leica_t_55-135_3p5-4p5/shots/b1ff88457a1f4ee6b289e920d8943673.png,858.333333,0.0,0.0,0.0,0.0
1,canon_70-200_4_is_ii_usm,other,1299.0,2018,2.0,70.0,200.0,True,4.0,20,...,0.27,True,780,True,https://3.img-dpreview.com/files/p/TS375x375~products/canon_70-200_4_is_ii_usm/shots/7ecb678ec914424486ca2ac9d326497a.png,388.571429,4.0,3.0,1.0,2.0
2,sony_fe_50_1p8,sony,247.975,2016,2.0,50.0,-1.0,False,1.8,6,...,0.14,True,186,False,https://3.img-dpreview.com/files/p/TS375x375~products/sony_fe_50_1p8/shots/414d9391e4164c2eb1eeb460f7b2f741.png,153.333333,0.0,8.0,8.0,2.0
3,samsung_10_3p5,other,552.82,2013,1.33,10.0,-1.0,False,3.5,7,...,0.22,True,71,False,https://4.img-dpreview.com/files/p/TS375x375~products/samsung_10_3p5/shots/fd37e35f19714e7c88a8b3dd06753c91.png,337.5,0.0,0.0,0.0,0.0
4,fujifilm_xc_50-230_ii,fujifilm,399.0,2015,1.33,50.0,230.0,True,4.5,13,...,0.2,True,-1,False,https://1.img-dpreview.com/files/p/TS375x375~products/fujifilm_xc_50-230_ii/shots/eeb3eebf26664d0d8e0bb676053fba52.png,170.0,0.0,0.0,4.0,3.0


In [11]:
len(specs)

311

### Restore brand labels

In [12]:
specs['brand'] = specs['lens_id'].apply(lambda x: x.split('_')[0])

In [13]:
specs.head()

Unnamed: 0,lens_id,brand,original_price,announce_date,format,flen_min,flen_max,image_stabilization,f_min,elements,...,maximum_magnification,autofocus,weight,sealing,image_href,resale_price,wildlife_score,portraits_score,low_light_score,landscape_score
0,leica_t_55-135_3p5-4p5,leica,2095.0,2014,1.33,55.0,135.0,False,3.5,12,...,-1.0,True,500,False,https://4.img-dpreview.com/files/p/TS375x375~products/leica_t_55-135_3p5-4p5/shots/b1ff88457a1f4ee6b289e920d8943673.png,858.333333,0.0,0.0,0.0,0.0
1,canon_70-200_4_is_ii_usm,canon,1299.0,2018,2.0,70.0,200.0,True,4.0,20,...,0.27,True,780,True,https://3.img-dpreview.com/files/p/TS375x375~products/canon_70-200_4_is_ii_usm/shots/7ecb678ec914424486ca2ac9d326497a.png,388.571429,4.0,3.0,1.0,2.0
2,sony_fe_50_1p8,sony,247.975,2016,2.0,50.0,-1.0,False,1.8,6,...,0.14,True,186,False,https://3.img-dpreview.com/files/p/TS375x375~products/sony_fe_50_1p8/shots/414d9391e4164c2eb1eeb460f7b2f741.png,153.333333,0.0,8.0,8.0,2.0
3,samsung_10_3p5,samsung,552.82,2013,1.33,10.0,-1.0,False,3.5,7,...,0.22,True,71,False,https://4.img-dpreview.com/files/p/TS375x375~products/samsung_10_3p5/shots/fd37e35f19714e7c88a8b3dd06753c91.png,337.5,0.0,0.0,0.0,0.0
4,fujifilm_xc_50-230_ii,fujifilm,399.0,2015,1.33,50.0,230.0,True,4.5,13,...,0.2,True,-1,False,https://1.img-dpreview.com/files/p/TS375x375~products/fujifilm_xc_50-230_ii/shots/eeb3eebf26664d0d8e0bb676053fba52.png,170.0,0.0,0.0,4.0,3.0


In [15]:
specs.describe()

Unnamed: 0,original_price,announce_date,format,flen_min,flen_max,f_min,elements,groups,minimum_focus,maximum_magnification,weight,resale_price,wildlife_score,portraits_score,low_light_score,landscape_score
count,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0
mean,876.319019,2014.5209,1.589389,42.390675,74.099678,2.940193,13.003215,9.845659,0.467878,0.105479,525.041801,499.883208,3.697749,2.382637,2.266881,2.009646
std,658.003478,2.734259,0.405383,43.904838,118.923883,1.185079,4.845016,3.604026,0.409666,0.602226,429.664552,415.647763,21.405274,7.463776,4.233068,3.766992
min,98.72,2010.0,1.0,7.5,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,50.0,0.0,0.0,0.0,0.0
25%,399.0,2012.0,1.33,16.0,-1.0,2.0,10.0,8.0,0.24,0.12,209.0,230.5925,0.0,0.0,0.0,0.0
50%,629.0,2014.0,1.33,25.0,16.0,2.8,13.0,10.0,0.3,0.2,453.0,375.0,0.0,0.0,0.0,0.0
75%,1081.95,2017.0,2.0,50.0,105.0,3.5,16.0,12.0,0.5,0.26,700.0,651.361789,1.0,2.0,3.0,2.0
max,2999.0,2020.0,2.0,300.0,600.0,12.0,32.0,24.0,2.7,5.0,2860.0,2474.25,288.0,93.0,36.0,30.0


### Create final DB

In [14]:
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2

dbname, tabname, username = 'results_db', 'results', 'ana'
engine = create_engine(f'postgresql://{username}:nonsense@localhost/{dbname}')
print(f'Created engine: {engine.url}')
if database_exists(engine.url):
  print(f'Database {dbname} found.')
else:
  print(f'Database {dbname} not found. Creating database...',)
  create_database(engine.url)
  print('Done.')

specs.to_sql(tabname, engine, if_exists='replace')
print(f'Wrote {tabname} table to {dbname}.')

Created engine: postgresql://ana:nonsense@localhost/results_db
Database results_db found.
Wrote results table to results_db.
