Tujuan Tugas Besar : 
- Melakukan Clustering terhadap data yang ada untuk menentukan kelompok / cluster data.

- Melakukan Regresi (prediksi) terhada O3 di suatu daerah.

In [1]:
from flask import Flask, request, render_template, url_for, redirect, jsonify
import tensorflow as tf
import pandas as pd
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn import preprocessing

In [2]:
def neural_network(filename):
    x = pd.read_csv(filename)
    y = x.pop('O3')
    
    x = preprocessing.scale(x)
    model = tf.keras.models.load_model('ozone-final.model')
    pred = model.predict(x)

    mse_train = mean_squared_error(y, pred)
    rmse_train = sqrt(mean_squared_error(y, pred))
    mae_train = mean_absolute_error(y, pred)
    r2 = "{:0.3f}".format(r2_score(y, pred))
    
    return mse_train, rmse_train, mae_train, r2

In [None]:
class K_means(object):

    def train(self, data, k, verbose=0):

        shape = data.shape
        
        #initialize new array of zero untuk centroid
        ranges = np.zeros((shape[1], 6))
        centroids = np.zeros((shape[1], 6))

        for dim in range(shape[1]):
            ranges[dim, 0] = np.min(data[:,dim])
            ranges[dim, 1] = np.max(data[:,dim])

        if verbose == 1:
            print('Ranges: ')
            print(ranges)

        #change array centroid shape sesuai cluster
        centroids = np.zeros((k, shape[1]))
        for i in range(k):
            for dim in range(shape[1]):
                centroids[i, dim] = np.random.uniform(ranges[dim, 0], ranges[dim, 1], 1)

        if verbose == 1:
            print('Centroids: ')
            print(centroids)

            plt.scatter(data[:,0], data[:,1])
            plt.scatter(centroids[:,0], centroids[:,1], c = 'r')
            plt.show()

        count = 0
        while True:
            count += 1
            if verbose == 1:
                print('-----------------------------------------------')
                print('Iteration: ', count)

            #hitung jarak
            distances = np.zeros((shape[0],k))
            for ix, i in enumerate(data):
                for ic, c in enumerate(centroids):
                    distances[ix, ic] = np.sqrt(np.sum((i-c)**2))

            #assign ke centroid terdekat
            labels = np.argmin(distances, axis = 1)

            #calculate posisi centroid baru
            new_centroids = np.zeros((k, shape[1]))
            for centroid in range(k):
                temp = data[labels == centroid]
                if len(temp) == 0:
                    return 0
                for dim in range(shape[1]): 
                    new_centroids[centroid, dim] = np.mean(temp[:,dim])

            if verbose == 1:
                plt.scatter(data[:,0], data[:,1], c = labels)
                plt.scatter(new_centroids[:,0], new_centroids[:,1], c = 'r')
                plt.show()

            #Cek apakah perbedaan posisi centroid sudah lebih kecil dari epsilon
            if np.linalg.norm(new_centroids - centroids) < np.finfo(float).eps:
                print("DONE!")
                plt.scatter(data[:,0], data[:,1], c = labels)
                plt.scatter(new_centroids[:,0], new_centroids[:,1], c = 'r')
                plt.show()
                break

        #Move centroid ke titik baru
            centroids = new_centroids
        self.centroids = centroids
        self.labels = labels
        if verbose == 1:
            print(labels)
            print(centroids)
        return 1

    def getAverageDistance(self, data):

        dists = np.zeros((len(self.centroids),))
        for ix, centroid in enumerate(self.centroids):
            temp = data[self.labels == ix]
            dist = 0
            for i in temp:
                dist += np.linalg.norm(i - centroid)
            dists[ix] = math.sqrt(dist)
        return dists

    def getLabels(self):
        return self.labels

In [None]:
def cluster(filename, k):
    x = pd.read_csv(filename)
    x = x.drop(['date', 'Address',], axis=1)

    x2 = x[['Longitude','Latitude', 'O3','SO2', 'NO2','CO']].copy()
    
    x_array = np.array(x2)

    clf = K_means()

    return clf.train(x_array,k)

In [6]:
ui = Flask(__name__)

x = []
@ui.route('/', methods=['POST', 'GET'])
def index():
    if request.method == 'POST':
        filename = request.form['myfile']
        mse, rmse, mae, r2 = neural_network(filename) 
        df = pd.read_csv(filename)
        return render_template('home.html', mse_dis = mse, rmse_dis = rmse, mae_dis = mae, r2_dis = r2, tables=[df.to_html(classes='data')], titles=df.columns.values)
    else:
        return render_template('home.html')

@ui.route('/cluster', methods=['POST', 'GET'])
def cluster():
    if request.method == 'POST':
        filename = request.form['myfile']
        clus = request.form['k']
        classification = cluster(filename, param, clus)
        df = pd.read_csv('data_hasil_cluster.csv')
        return render_template('cluster.html', tables=[x.to_html(classes='data')], titles=x.columns.values)
    else:
        x = pd.read_csv('data_cleaned_avg_cluster2.csv')
        x = x.drop(['date', 'Address','Longitude', 'Latitude'], axis=1)
        colours = []
        for col in x.columns: 
             colours.append(col)
        return render_template('cluster.html', colours=colours)
        
if __name__ == "__main__":
    ui.run()

* Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off
I0430 09:09:20.445754  4472 _internal.py:122]  * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
I0430 09:09:50.127665  5948 _internal.py:122] 127.0.0.1 - - [30/Apr/2020 09:09:50] "[37mGET / HTTP/1.1[0m" 200 -
I0430 09:11:46.376808 15392 _internal.py:122] 127.0.0.1 - - [30/Apr/2020 09:11:46] "[37mPOST / HTTP/1.1[0m" 200 -
I0430 09:14:00.256744   532 _internal.py:122] 127.0.0.1 - - [30/Apr/2020 09:14:00] "[37mGET / HTTP/1.1[0m" 200 -
