In [None]:
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install seaborn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('mobile_train.csv')
df.describe()


In [None]:
corr = df.corr()
plt.figure(figsize = (8,6))
sns.heatmap(corr)

In [None]:
df['bluetooth']=np.where(df["blue"]==1,'Yes','No')

In [None]:
def conv_gen(x):
    converter = {4:0,0:2,3:3,7:4}
    return converter[x]

def conv_speed(x):
    if x < 1:
        return 'low'
    elif x < 2.3:
        return 'medium'
    return 'high'

def conv_cores(x):
    converter = {1:'single',2:"dual",3:"triple",4:"quad",5:"penta",6:"hexa",7:"hepta",8:"octa"}
    return converter[x]

def conv_wifi(x):
    res = "none"
    c = ['b','b','a']
    if x > 0:
        c.extend(['a','a','b','a','g'])
    if x > 1:
        c.extend(['a','a','g','g','n'])
    if x > 0:
        c.extend(['g','g','g','n','n','n','n'])
    if x != -1:
        res = np.random.choice(c)
    converter = {0:'b',1:"a",2:"g",3:"n",-1:"none"}
    return res

def conv_price(x):
    if x > 15:
        return 1 if np.random.random() > 0.4 else 0
    return 0

def conv_cam_price(x):
    if x > 15:
        return 1.1 if np.random.random() > 0.4 else 0.95
    return 0.95

def conv_range(x):
    if x < 697:
        return 0
    elif x < 839:
        return 1
    elif x < 1010:
        return 2
    else:
        return 3

In [None]:
df['gen_tmp'] = df["four_g"]*4+df["three_g"]*3
df['gen'] = df['gen_tmp'].apply(conv_gen)
df

In [None]:
plt.hist(df.clock_speed,bins=25)

In [None]:
df['speed'] = df['clock_speed'].apply(conv_speed)
df

In [None]:
df['sim'] = np.where(df['dual_sim'] == 0, 'Single','Dual')
df

In [None]:
df['f_camera'] = np.where(df['fc']==0,np.NaN,df['fc'])
df['camera'] = np.where(df['pc']==0,np.NaN,df['pc'])
df

In [None]:
df.int_memory.count()

In [None]:
df['memory'] = df.int_memory*1000 + np.round(np.random.normal(500,250,size =df.int_memory.count())).astype(int)
df

In [None]:
df['dpi_w'] = df['px_width']/df['sc_w']*2.5
df['dpi_h'] = df['px_height']/df['sc_h']*2.5

df[['dpi_h','dpi_w','px_height','px_width','sc_h','sc_w']]

In [None]:
df['screen']=np.where(df["touch_screen"]==1,'Touch','LCD')
df

In [None]:
df.columns

In [None]:
df_out = df[['battery_power','m_dep', 'mobile_wt', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'bluetooth', 'gen',
       'speed', 'sim', 'f_camera', 'camera', 'memory', 'screen', 'wifi', 'price_range']]
df_out

In [None]:
plt.hist(df.camera)

In [None]:
df['price_range_2'] = df['price_range']+df.camera.apply(conv_price)

In [None]:
df[['price_range_2',"price_range"]]

In [None]:
corr2 = df.corr()
plt.figure(figsize = (8,6))
sns.heatmap(corr2)

In [None]:
df_out = df[['battery_power','m_dep', 'mobile_wt', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'bluetooth', 'gen',
       'speed', 'sim', 'f_camera', 'camera', 'memory', 'screen', 'wifi', 'price_range','price_range_2']].copy()
df_out

In [None]:
corr_new = df_out.corr()
plt.figure(figsize = (8,6))
sns.heatmap(corr_new)

In [None]:
df_out.index.rename('id',inplace=True)

In [None]:
#df_out['price'] = (df_out['price_range']+1) * 367 + np.random.triangular(-200,200,300,2000)
#df_out['price'].min()

In [None]:
corr_price = df_out.corr()
plt.figure(figsize = (8,6))
sns.heatmap(corr_price)

In [None]:
np.random.normal(0.1,0.1,2000)

In [None]:
df_out["res"] = df_out.px_height * df_out.px_width

In [None]:
def price_func(df):
    bat = np.random.normal(1.5,0.5,2000)
    #height = np.random.normal(2,1,2000)
    #width = np.random.normal(2,0.5,2000)
    res = np.random.normal(0.0006,0.0002,2000)
    ram = np.random.normal(1.2,0.5,2000)
    gen = np.random.normal(1100,80,2000)
    
    
    #p = df.battery_power*bat + df.px_height*height + df.px_width*width + df.ram*ram + df.gen*gen +np.random.triangular(-200,200,300,2000)
    p = df.battery_power*bat + df.res*res + df.ram*ram + df.gen*gen +np.random.triangular(-200,200,300,2000)
    return np.round(p/10,2)

In [None]:
df_out['price'] = price_func(df_out)
df_out['price']

In [None]:
df_out['price_2'] = np.round(df_out['price']*df_out.camera.apply(conv_cam_price),2)
df_out

In [None]:
df_out.describe()

In [None]:
#prices = df_out['price_range_2']
#df_out.drop('price_range_2',axis=1, inplace=True)

In [None]:
#df_out.to_csv('mobile_prices.csv')

In [None]:
#plt.scatter(df_out.gen,df_out.price)

In [None]:
df_final = df_out.drop(['price_range','price_range_2'],axis=1).copy()

In [None]:
df_final

In [None]:
df_final.describe()

In [None]:
df_final["price_range"] = df_final.price.apply(conv_range)

In [None]:
df_final["wifi_tmp"] = np.where(df_final.wifi==0,-1,df_final["price_range"])
df_final['wifi']= df_final['wifi_tmp'].apply(conv_wifi)
df_final[["wifi_tmp","wifi",'price']]

In [None]:
sns.violinplot(x='wifi',y='price',data=df_final)

In [None]:
df_final = df_final.drop(["wifi_tmp","price_range","res"],axis=1)

In [None]:
corr_final = df_final.corr()
plt.figure(figsize = (8,6))
sns.heatmap(corr_final)

In [None]:
sns.violinplot(x='price',data=df_final)

In [None]:
sns.violinplot(x='wifi',y='price',data=df_final)

In [None]:

df_final.drop("price_2",axis=1).to_csv('mobile_price_1.csv')

In [None]:
price_2 = df_final.price_2
price_2.to_csv('mobile_price_2.csv')

In [None]:
df_1 = pd.read_csv('mobile_price_1.csv',index_col='id')
df_2 = pd.read_csv('mobile_price_2.csv',index_col='id')
df_3 = pd.concat([df_1,df_2],axis=1)
df_3.head()

In [None]:
df_3["price_diff"]=df_3.price_2/df_3.price
df_3

In [None]:
plt.scatter(df_3.camera,df_3.price_diff)