In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [None]:
df=pd.read_csv("data/train.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.isnull().any()

In [None]:
df=df.rename(columns={'blue':'bluetooth','fc':'front_cam_mp','sc_h':'screen_ht','sc_w':'screen_wt'})

In [None]:
df=df.rename(columns={'pc':'back_cam_mp'})

In [None]:
df=df.drop(columns=['m_dep','mobile_wt','px_height','px_width'])

In [None]:
categorical_features = ['bluetooth', 'clock_speed', 'dual_sim', 'four_g', 'three_g', 'touch_screen', 'wifi']
continuous_features = ['battery_power', 'front_cam_mp', 'int_memory', 'n_cores', 'back_cam_mp', 'ram']
TARGET = ['price_range']

In [None]:
df[continuous_features].describe()

In [None]:
df.shape

In [None]:
df['battery_power'].describe()

In [None]:
df['bluetooth'].unique()

In [None]:
df['clock_speed'].unique()

In [None]:
df['dual_sim'].value_counts()

In [None]:
df['front_cam_mp'].unique()

In [None]:
df['four_g'].value_counts()

In [None]:
df['int_memory'].describe()

In [None]:
df['n_cores'].value_counts()

In [None]:
df['back_cam_mp'].describe()

In [None]:
df['ram'].describe()

In [None]:
df['screen_ht'].unique()

In [None]:
df['screen_wt'].unique()

In [None]:
df['talk_time'].describe()

In [None]:
df['three_g'].value_counts()

In [None]:
df['touch_screen'].value_counts()

In [None]:
df['four_g'].value_counts()

In [None]:
df['wifi'].value_counts()

In [None]:
df['price_range'].value_counts()

In [None]:
df['clock_speed']=pd.cut(df['clock_speed'],bins=[0,1,2,3],labels=['low','mid','high'])



In [None]:
df.head()

In [None]:
level_map={'low':0,'mid':1,'high':2}
df['clock_speed']=df['clock_speed'].replace(level_map)

In [None]:
df.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Bar Plot
fig = plt.figure(figsize = (6, 4))
title = fig.suptitle("No. of Cores Frequency", fontsize=14)
fig.subplots_adjust(top=0.85, wspace=0.3)

ax = fig.add_subplot(1,1, 1)
ax.set_xlabel("No.of Cores")
ax.set_ylabel("Frequency") 
w_q = df['n_cores'].value_counts()
w_q = (list(w_q.index), list(w_q.values))
ax.tick_params(axis='both', which='major', labelsize=8.5)
bar = ax.bar(w_q[0], w_q[1], color='steelblue', 
        edgecolor='black', linewidth=1)

In [None]:
labels=['low','mid','high']
df['clock_speed'].value_counts().plot(kind='pie',autopct='%.2f')
plt.tight_layout()
plt.legend(labels)
plt.show()

In [None]:
plt.hist2d(df['price_range'], df['ram'], bins=16, cmap='Blues')
cb = plt.colorbar()
cb.set_label('counts in bin')

In [None]:
sns.set(style="ticks", color_codes=True)

In [None]:
sns.barplot(x=df['price_range'],y=df['int_memory'])
plt.xticks(rotation=90)

In [None]:

# Multi-bar Plot
cp = sns.countplot(x="price_range", hue="touch_screen", data=df, 
                   palette={0: "#FF9999", 1: "#FFE888"})

In [None]:
# Box Plots
f, (ax) = plt.subplots(1, 1, figsize=(12, 4))
f.suptitle('No.of Cores - Internal Memory', fontsize=14)

sns.boxplot(x="n_cores", y="int_memory", data=df,  ax=ax)
ax.set_xlabel("No.of cores",size = 12,alpha=0.8)
ax.set_ylabel("internal memory",size = 12,alpha=0.8)

In [None]:
df.boxplot(column='battery_power',by='price_range')
plt.show()

In [None]:
df[['screen_ht', 'screen_wt']].describe()

In [None]:
df['screen_wt'].value_counts()

In [None]:
df.loc[df['screen_wt']==0]['screen_ht']

In [None]:
df.loc[df['screen_ht'] == 18]['screen_wt']

In [None]:
arr = []
for d in df.loc[df['screen_wt']==0]['screen_ht']:
    arr.append(d)

In [None]:
set(arr)

In [None]:
mean_width = {}
for d in set(arr):
    total = 0
    n = 0
    for width in df.loc[df['screen_ht'] == d]['screen_wt']:
        if width == 0:
            pass
        total += width
        n += 1
        mean = round(total/n, 2)
    print("Mean width for height", d, "=", mean)
    mean_width[d] = mean

In [None]:
df['screen_ht'] = df['screen_ht'].astype(float)
df['screen_wt'] = df['screen_wt'].astype(float)

In [None]:
df[['screen_ht','screen_wt']].dtypes

In [None]:
df_copy = df.copy()

In [None]:
x = df[df['screen_wt']==0]['screen_ht'].value_counts().index.tolist()

In [None]:
for z in x:
    df['screen_wt'] = np.where(((df['screen_wt']==0.0) & (df['screen_ht']==z)), mean_width.get(z), df['screen_wt'])

In [None]:
df['screen_wt'].value_counts()

In [None]:
df.head()

In [None]:
df['screen_size'] = df['screen_ht']**2 + df['screen_wt']**2

In [None]:
df['screen_size'] = np.sqrt(df['screen_size'])

In [None]:
df['screen_size'] = df['screen_size']/2.54

In [None]:
df['screen_size'] = df['screen_size'].round(2)

In [None]:
df.drop(columns=['screen_ht', 'screen_wt'], inplace=True)

In [None]:
p = pd.DataFrame(df['price_range'])

In [None]:
df.drop(columns=TARGET, inplace=True)

In [None]:
df = df.join(p)

In [None]:
p