In [None]:
import platform
import matplotlib
from matplotlib import font_manager, rc

# '-' 기호 보이게 하기
matplotlib.rcParams['axes.unicode_minus'] = False

# 운영 체제마다 한글이 보이게 하는 설정
# 윈도우
if platform.system() == 'Windows':
    path = "c:\Windows\Fonts\malgun.ttf"
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
# 맥
elif platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
# 리눅스
elif platform.system() == 'Linux':
    rc('font', family='NanumBarunGothic')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
power_data = pd.read_excel('data/시도별_용도별.xls')
print(power_data.shape)

In [None]:
power_data.head().T

In [None]:
power = power_data.set_index('구분')
power = power.drop(['합계', '개성'], errors='ignore')

In [None]:
see2 = [ '서비스업','제조업']
power[see2].plot(kind='barh', figsize=(10,6), stacked=True)

In [None]:
power = power[['서비스업', '제조업']]
power.head(5)

In [None]:
power = power.drop(['경기', '서울'])
n_samples = power.shape[0] ; n_samples

In [None]:
plt.figure(figsize=(4,4))
plt.scatter(power['서비스업'], power['제조업'],c='k',marker='o')
plt.xlabel('서비스업')
plt.ylabel('제조업')

for n in range(power.shape[0]):
    plt.text(power['서비스업'][n]*1.03, power['제조업'][n]*0.98, power.index[n])

## 덴드로그램

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage

plt.figure(figsize=(6, 3))
link_dist = linkage(power, metric='euclidean', method='ward')  

dendrogram(link_dist, labels=power.index)
plt.show()

In [None]:
from sklearn.preprocessing import *
power[['서비스업','제조업']]= StandardScaler().fit_transform(power[['서비스업', '제조업']])
#power

link_dist = linkage(power, metric='euclidean', method='ward')  
dendrogram(link_dist, labels=power.index)
plt.show()

In [None]:
from sklearn.preprocessing import *
power[['서비스업','제조업']] = MinMaxScaler().fit_transform(power[['서비스업', '제조업']])
#power

link_dist = linkage(power, metric='euclidean', method='ward')  
dendrogram(link_dist, labels=power.index)
plt.show()

In [None]:
from sklearn.preprocessing import *
power[['서비스업','제조업']]   = MaxAbsScaler().fit_transform(power[['서비스업', '제조업']])
#power

link_dist = linkage(power, metric='euclidean', method='ward')  
dendrogram(link_dist, labels=power.index)
plt.show()

In [None]:
from sklearn.preprocessing import *
power[['서비스업','제조업']]= RobustScaler().fit_transform(power[['서비스업', '제조업']])
#power

link_dist = linkage(power, metric='euclidean', method='ward')  
dendrogram(link_dist, labels=power.index)
plt.show()

# Kmeans

In [None]:
from sklearn.cluster import KMeans
k = KMeans(n_clusters=3).fit(power)
print(k.n_clusters)

In [None]:
k = KMeans(n_clusters=4).fit(power)
print(k.n_clusters)

In [None]:
power['클러스터'] = k.labels_  ##그룹명
power.head(4)

In [None]:
power.drop('클러스터', axis = 1, inplace=True) ; 
power.head()

In [None]:
centers = k.cluster_centers_  #그룹의 중심좌표를 확인
centers

In [None]:
my_markers=['*','^', 'o','^','.',',','1','2']
my_color =['r','c','g','b','g','k','r','y']
plt.figure(figsize=(5, 4))
plt.xlabel('서비스업')
plt.ylabel('제조업')

for n in range(power.shape[0]):
    label = k.labels_[n]
    plt.scatter(power['서비스업'][n], power['제조업'][n], c=my_color[label], 
                marker=my_markers[label], s=100)
    
    plt.text(power['서비스업'][n]*1.03, power['제조업'][n]*0.98, power.index[n])
    
for i in range(k.n_clusters):
    plt.scatter(centers[i][0], centers[i][1], c = 'k', s= 100)

In [None]:
power_data.head().T

In [None]:
power2 = power_data.set_index('구분')
power2.drop(['업무용합계','산업용합계','합계'], 
               axis=1,inplace=True, errors='ignore')
power2.drop(['합계','개성','경기','서울'], inplace=True,errors='ignore')

In [None]:
index_power = power2.index
column_power = power2.columns
manufacture = list(column_power).index('제조업')
service = list(column_power).index('서비스업')
manufacture

In [None]:
power3 = StandardScaler().fit_transform(power2)

In [None]:
type(power3);type(power2)

In [None]:
Z = linkage(power3, metric='euclidean', method='ward')  
plt.figure(figsize=(10, 5))
plt.title('덴드로그램')
dendrogram(Z, labels=index_power)
plt.show()

In [None]:
k = KMeans(n_clusters=4).fit(power3)

In [None]:
n_samples = power3.shape[0]
print(n_samples)

plt.figure(figsize=(5, 4))
plt.xlabel('서비스업')
plt.ylabel('제조업')

for n in range(n_samples):
    label = k.labels_[n]
    plt.scatter(power3[n][service], power3[n][manufacture], 
                c=my_color[label], marker=my_markers[label], s=100)
    plt.text(power3[n][service]*1.05, power3[n][manufacture]*0.99, 
             index_power[n])