## Importing Libraries
* **pandas**: For data manipulation and analysis
* **sklearn.cluster**: For clustering algorithms 
* **sklearn.model_selection**: For splitting the dataset into training and testing sets 
* **sklearn.ensemble**: For building ensemble models 
* **scipy.cluster.hierarchy**: For hierarchical clustering algorithms 
* **matplotlib.pyplot**: For data visualization 
* **seaborn**: For creating attractive and informative statistical graphics 
* **random**: For generating random numbers 
* **numpy**: For numerical computing 

In [2]:
import pandas as pd
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
import random as rd
import numpy as np
import math
import os


## Reading in the input data

In [3]:
# Load the data
try:
    data = pd.read_csv("dataset.csv")
except FileNotFoundError:
    print("Could not find the input file. Please check the file path and try again.")
    exit()

## Viewing the first few rows of the data

In [4]:
data.head()

Unnamed: 0,device_name,min_consumption,max_consumption,device_age
0,bulb,100.0,120.0,3
1,LED TV 42 INCH,58.0,60.0,5
2,3''belt sander,1000.0,1000.0,4
3,Air cooler,65.0,80.0,4
4,Air purifier,25.0,30.0,3


## Getting values for input_power, working_hours, and output_power

This code cell generates values for the `input_power`, `working_hours`, and `output_power` columns of the `data` DataFrame based on the `min_consumption` and `max_consumption` columns.


In [5]:
min_c = data['min_consumption'].values
max_c = data['max_consumption'].values
random_consumption = [rd.uniform(min_c[i], max_c[i]) for i in range(len(min_c))]
working_hours = []
for i in range(len(min_c)):
    random_hours = rd.randint(0,24)
    working_hours.append(random_hours)
data['input_power'] = random_consumption
data['working_hours'] = working_hours
output_power = []
for i in range(len(min_c)):
    output_power.append(rd.uniform(random_consumption[i] * 0.70 , random_consumption[i]))
    output_power[i] = output_power[i]
data['output_power'] = output_power 
data.head()

Unnamed: 0,device_name,min_consumption,max_consumption,device_age,input_power,working_hours,output_power
0,bulb,100.0,120.0,3,104.751767,1,75.239147
1,LED TV 42 INCH,58.0,60.0,5,59.605196,20,50.145817
2,3''belt sander,1000.0,1000.0,4,1000.0,16,989.940861
3,Air cooler,65.0,80.0,4,78.314242,18,68.466217
4,Air purifier,25.0,30.0,3,29.473341,23,25.438767


## Calculating efficiency metrics

This code cell calculates the `Enhanced_efficiency` and `efficiency` columns of the `data` DataFrame based on the `input_power`, `output_power`, and `min_consumption` columns.

In [6]:
Enhanced_efficiency = []
for i in range(len(min_c)):
    Enhanced_efficiency.append(output_power[i]/random_consumption[i]*100)
data['Enhanced_efficiency'] = Enhanced_efficiency
efficiency = data['output_power']/data['min_consumption']*100
data['efficiency'] = efficiency
data.head()


Unnamed: 0,device_name,min_consumption,max_consumption,device_age,input_power,working_hours,output_power,Enhanced_efficiency,efficiency
0,bulb,100.0,120.0,3,104.751767,1,75.239147,71.826137,75.239147
1,LED TV 42 INCH,58.0,60.0,5,59.605196,20,50.145817,84.129943,86.458305
2,3''belt sander,1000.0,1000.0,4,1000.0,16,989.940861,98.994086,98.994086
3,Air cooler,65.0,80.0,4,78.314242,18,68.466217,87.424988,105.332642
4,Air purifier,25.0,30.0,3,29.473341,23,25.438767,86.311109,101.755069


## Handling values

In [7]:
data = data.replace([np.inf, -np.inf], np.nan)
data = data.dropna()
data.head()

Unnamed: 0,device_name,min_consumption,max_consumption,device_age,input_power,working_hours,output_power,Enhanced_efficiency,efficiency
0,bulb,100.0,120.0,3,104.751767,1,75.239147,71.826137,75.239147
1,LED TV 42 INCH,58.0,60.0,5,59.605196,20,50.145817,84.129943,86.458305
2,3''belt sander,1000.0,1000.0,4,1000.0,16,989.940861,98.994086,98.994086
3,Air cooler,65.0,80.0,4,78.314242,18,68.466217,87.424988,105.332642
4,Air purifier,25.0,30.0,3,29.473341,23,25.438767,86.311109,101.755069


## Adjusting efficiency values



In [8]:
List_efficiency = data['efficiency'].values
List_enhan_efficiency = data['Enhanced_efficiency'].values

for i in range(len(List_efficiency)):
    # if enhance efficiency is greater than enhance efficiency then minus abs value for efficiency
    if List_enhan_efficiency[i] > List_efficiency[i]:
        List_efficiency[i] = List_efficiency[i] - abs(List_enhan_efficiency[i] - List_efficiency[i])
    # if enhance efficiency is less than enhance efficiency then plus abs value for efficiency
    elif List_enhan_efficiency[i] < List_efficiency[i]:
        List_efficiency[i] = List_efficiency[i] + abs(List_enhan_efficiency[i] - List_efficiency[i])
    # if enhance efficiency is equal to enhance efficiency then plus abs value for efficiency
    else:
        List_efficiency[i] = List_efficiency[i] + abs(List_enhan_efficiency[i] - List_efficiency[i])
data['efficiency'] = List_efficiency
data['efficiency'] = data['efficiency'].apply(lambda x: 200 if x > 200 else x)
data['efficiency'] = data['efficiency'].apply(lambda x: 0 if x < 0 else x)
data['Enhanced_efficiency'] = data['Enhanced_efficiency'].apply(lambda x: 200 if x > 200 else x)
data['Enhanced_efficiency'] = data['Enhanced_efficiency'].apply(lambda x: 0 if x < 0 else x)
data.head()


Unnamed: 0,device_name,min_consumption,max_consumption,device_age,input_power,working_hours,output_power,Enhanced_efficiency,efficiency
0,bulb,100.0,120.0,3,104.751767,1,75.239147,71.826137,78.652158
1,LED TV 42 INCH,58.0,60.0,5,59.605196,20,50.145817,84.129943,88.786667
2,3''belt sander,1000.0,1000.0,4,1000.0,16,989.940861,98.994086,98.994086
3,Air cooler,65.0,80.0,4,78.314242,18,68.466217,87.424988,123.240296
4,Air purifier,25.0,30.0,3,29.473341,23,25.438767,86.311109,117.199029


## Training a GradientBoostingRegressor model

In [10]:
X = data[['min_consumption', 'max_consumption', 'input_power']]
y = data['efficiency']
Normal = DecisionTreeRegressor()
Normal.fit(X, y)
Normal.score(X, y)
print(Normal.feature_importances_)


[0.46107107 0.28211838 0.25681055]


In [15]:
#feature to predict enhanced efficiency with K-centroid
X = data[['min_consumption', 'max_consumption', 'input_power']]
y = data['Enhanced_efficiency']


## k-centroid hierarchical clustering


In [16]:
import math

def hierarchical_clustering(data, k):
    clusters = [[point] for point in data]  # Initialize each point as a separate cluster

    while len(clusters) > k:
        min_distance = math.inf
        merge_index = ()

        # Find the closest pair of clusters
        for i in range(len(clusters)):
            for j in range(i + 1, len(clusters)):
                dist = find_centroid_distance(clusters[i], clusters[j])
                if dist < min_distance:
                    min_distance = dist
                    merge_index = (i, j)

        # Merge the closest pair of clusters
        merged_cluster = clusters[merge_index[0]] + clusters[merge_index[1]]
        clusters.pop(merge_index[1])
        clusters[merge_index[0]] = merged_cluster

    return clusters

def find_centroid_distance(cluster1, cluster2):
    centroid1 = find_centroid(cluster1)
    centroid2 = find_centroid(cluster2)
    total_distance = 0

    # Calculate the distance between all pairs of objects from the two clusters
    for point1 in cluster1:
        for point2 in cluster2:
            total_distance += math.sqrt(sum([(a - b) ** 2 for a, b in zip(point1, point2)]))

    # Calculate the average distance
    avg_distance = total_distance / (len(cluster1) * len(cluster2))
    return avg_distance

def find_centroid(cluster):
    sum_coords = [0] * len(cluster[0])  # Initialize the sum of coordinates to zeros

    # Compute the sum of coordinates for each dimension
    for point in cluster:
        for i, coord in enumerate(point):
            sum_coords[i] += coord

    # Compute the centroid by taking the average of the coordinates
    centroid = [sum_coord / len(cluster) for sum_coord in sum_coords]
    return centroid


In [13]:
# Run the hierarchical clustering algorithm
clusters = hierarchical_clustering(X.values, 3)


In [17]:
# Print the clusters
for i, cluster in enumerate(clusters):
    print('Cluster {}:'.format(i + 1))
    for point in cluster:
        print(point)
    print()

Cluster 1:
[100.         120.         104.75176697]
[ 95.        125.         97.2922973]
[100. 100. 100.]
[ 99.678      100.          99.94966096]
[ 99.         100.          99.35529285]
[ 97.8        100.          99.81611273]
[100.         102.         101.88494999]
[95.         98.         97.29519635]
[100.         109.         107.35818995]
[116.         117.         116.52606271]
[120.         130.         127.04674215]
[ 60.         150.          82.82863993]
[ 50.         110.         108.30761491]
[ 65.         125.          92.92958192]
[58.         60.         59.60519556]
[60. 60. 60.]
[59.7        60.         59.83594335]
[60.         70.         62.11094851]
[60.         70.         61.73906214]
[55.         73.         66.79091994]
[60.         80.         63.75790575]
[50.         60.         53.23757757]
[45.         60.         48.63465262]
[30.         60.         52.69361878]
[26.         60.         59.84821687]
[65.        80.        78.3142425]
[70.         80.

## Gradient Boosting Regressor Model with Enhanced Efficiency


In [19]:
# Print the centroids for Enhanced efficiency
for i, cluster in enumerate(clusters):
    print('Centroid {}:'.format(i + 1))
    print(find_centroid(cluster))
    print()
K_centroid = DecisionTreeRegressor()
K_centroid.fit(X, y)
K_centroid.score(X, y)
print(K_centroid.feature_importances_)

Centroid 1:
[394.64857439446365, 584.621152249135, 479.4868065838145]

Centroid 2:
[8000.0, 14000.0, 13596.646430055524]

Centroid 3:
[4694.444444444444, 8100.0, 6623.092150472269]



## Test input

In [27]:

# test_input = [[300, 600, 400]]
# test_output = Normal.predict(test_input)
# print("Normal efficiency: ", test_output)
# test_output = K_centroid.predict(test_input)
# print("Enhanced efficiency: ", test_output)


Normal efficiency:  [82.72207318]
Enhanced efficiency:  [73.0891326]




## Saving models

In [28]:
import pickle
try:
    os.mkdir('pretrained_model')
except:
    pass
pickle.dump(Normal, open('pretrained_model/Normal.pkl', 'wb'))
pickle.dump(K_centroid, open('pretrained_model/K_centroid.pkl', 'wb'))
print("Model saved successfully")

Model saved successfully
