In [None]:
import pandas as pd
laptops = pd.read_csv("laptops.csv",sep=';',decimal=',',header=0)

In [None]:
laptops.info()

In [None]:
brands = ['Acer','Apple','Asus','Dell','HP','Lenovo','Medion','Toshiba']
laptops.brand=pd.Categorical(laptops.brand,categories = brands)

In [None]:
cpuGen = ['Sandy Bridge','Ivy Bridge','Haswell','Broadwell','Skylake','Kabylake']
laptops['cpuGeneration']=laptops['cpuGeneration'].astype(pd.CategoricalDtype(categories=cpuGen,ordered=True))
cpuTypeLevels=['i3','i5','i7']
laptops.cpuType=pd.Categorical(laptops.cpuType,ordered=True,categories=cpuTypeLevels)

In [None]:
# FREQUENCIES

In [None]:
laptops.cpuType.value_counts()

In [None]:
laptops.cpuType.value_counts(dropna=False)

In [None]:
laptops.cpuType.value_counts().sort_index() # absolute freqs

In [None]:
laptops.brand.value_counts(normalize=True)

In [None]:
(laptops.brand.value_counts(normalize=True)*100).round(1) # relative freqs

In [None]:
laptops.cpuGeneration.value_counts().sort_index().cumsum() # absolute cumulative frequency

In [None]:
(laptops.cpuGeneration.value_counts(normalize=True).sort_index().cumsum()*100).round(1) # cumulative percentages

In [None]:
# function that gets a vector as a parameter and puts all frequencies in a table
def all_freq(x):
    t_abs=x.value_counts(dropna=False).sort_index()
    t_rel=(x.value_counts(dropna=False,normalize=True).sort_index()*100).round(1)
    t_abs_cum = x.value_counts(dropna=False).sort_index().cumsum()
    t_rel_cum = (x.value_counts(dropna=False,normalize=True).sort_index().cumsum()*100).round(1)
    return pd.DataFrame({'abs freq':t_abs,'rel freq:':t_rel , 'abs cum freq':t_abs_cum , 'rel cum freq':t_rel_cum})
all_freq(laptops.cpuGeneration)

In [None]:
# CROSS TABLES
# one dimensional table

In [None]:
laptops.cpuType.value_counts(dropna=False).sort_index()

In [None]:
# multidimensional table

In [None]:
pd.crosstab(laptops.brand,laptops.cpuType,margins=True)

In [None]:
pd.crosstab([laptops.brand,laptops.cpuType],laptops.RAM,margins=True)

In [None]:
# CLASSES
cutpoints = range(0,1200,100)
classes = pd.cut(laptops.diskspace, bins=cutpoints)
classes.value_counts().sort_index()

In [None]:
# CHARTS AND PIES AND PIE CHARTS

In [None]:
import matplotlib.pyplot as plt
x = laptops.RAM.value_counts().sort_index()
lab = ['1GB','2GB','4GB','8GB','16GB']
plt.figure()
plt.pie(x, labels=lab)
plt.title('RAM in laptops')
plt.show()

In [None]:
# BAR GRAPH

In [None]:
x = laptops.RAM.value_counts().sort_index()
lab = ['1GB','2GB','4GB','8GB','16GB']
plt.figure()
plt.bar(lab,x)
plt.title('RAM in laptops')
plt.xlabel('RAM memory (GB)')
plt.ylabel('Number')
plt.show()

In [None]:
plt.figure()
ctC = pd.crosstab([laptops.brand, laptops.cpuType], laptops.RAM, margins=False)
ctC.plot.bar(title='RAM in laptops', xlabel='Brand',stacked=True)
plt.show()

In [None]:
# HISTOGRAM

In [None]:
plt.figure()
laptops.RAM.plot.hist(title='RAM in laptops', xlabel = 'Disk space (GB)', ylabel = 'Frequency', stacked=True)
plt.show()

In [None]:
cutpoints = [0,185,375,750,1100]
plt.figure()
laptops.diskspace.plot.hist(bins=cutpoints, title='Diskspace in laptops', xlabel='Disk space (GB)',ylabel='Frequency',stacked=True)
plt.show()

In [None]:
# FREQUENCY POLYGON

In [None]:
cutpoints=[0,185,375,750,1100]
classes=pd.cut(laptops.diskspace, bins=cutpoints)
x = classes.value_counts().sort_index().plot()
plt.show()

In [None]:
# SPIDER PLOTS

In [None]:
import math
x = laptops.brand
t = x.value_counts()
categories = t.index
values = t.values.tolist()
values += values[:1]
n = len(t)
m = max(values)
angles = [k/float(n)*2*math.pi for k in range(n)]
angles += angles[:1]
plt.figure()
ax = plt.subplot(111, polar = True)
plt.xticks(angles[:-1],categories,color='grey',size=8)
ax.set_rlabel_position(0)
plt.ylim(0,m)
plt.plot(angles,values,linewidth=1,linestyle='solid')
plt.fill(angles,values,'b',alpha=0.1)
plt.show()

In [None]:
# WORD CLOUD

In [6]:
import wordcloud as wc
import numpy as np
from PIL import Image
def create_word_cloud(text,file_name):
    maskArray = np.array(Image.open(file_name))
    cloud = wc.WordCloud(background_color='white',max_words=200,mask=maskArray)
    cloud.generate(text)
    cloud.to_file('WC_'+file_name)
    plt.figure()
    plt.imshow(cloud,interpolation='bilinear')
    plt.axis('off')
    plt.show()
    create_word_cloud(text,'cloud.jpg')

ModuleNotFoundError: No module named 'wordcloud'