In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from sklearn.cluster import KMeans
from scipy.spatial import distance_matrix
import os

sns.set_style('dark')

FILE = "MISO.csv"
DIRI = "data"
MAX_K = 50

file_path = os.path.join(DIRI, FILE)
df = pd.read_csv(file_path)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43848 entries, 0 to 43847
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   date_time             43848 non-null  object
 1   raw demand (MW)       43848 non-null  object
 2   category              43848 non-null  object
 3   cleaned demand (MW)   43848 non-null  int64 
 4   forecast demand (MW)  43848 non-null  object
dtypes: int64(1), object(4)
memory usage: 1.7+ MB


In [3]:
df["date_time"] = pd.to_datetime(df['date_time'], format='%Y-%m-%d %H:%M:%S')
df["date"] = df["date_time"].dt.date
df["hour"] = df["date_time"].dt.hour
df_dropped = df.drop(["raw demand (MW)", "category", "forecast demand (MW)", "date_time"], axis=1)
dfp = df_dropped.pivot(index = "date", columns = "hour", values = "cleaned demand (MW)")
dfp

hour,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-02,87106,85142,84173,82404,77086,71479,67323,64587,62873,62138,...,76123,78943,81634,83930,86153,88073,89456,90025,89977,88840
2015-07-03,86617,84035,82697,80747,75547,69890,65701,62852,60966,59757,...,69386,73575,76766,79045,80973,82617,83892,84849,85395,84955
2015-07-04,83036,80300,78827,77143,73104,68519,64265,61292,59282,58046,...,65163,69930,73838,76903,78837,80112,81067,81992,82468,82044
2015-07-05,80122,77235,75163,73266,70980,67902,63897,60809,58759,57506,...,65280,70697,75485,79724,83357,86119,88217,89997,91034,90976
2015-07-06,89452,87034,85840,83864,78545,72845,68584,65545,63804,63297,...,82152,86928,91549,95720,99066,101417,102825,103393,103568,102585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-27,92775,90158,87435,84779,80241,75376,71881,68623,66457,65049,...,70598,74987,78882,82403,85428,87714,89482,90820,92175,92660
2020-06-28,91528,88612,85261,82431,78280,73872,70150,66795,64446,62600,...,68517,73430,78632,83308,87021,89960,92379,94583,96385,97033
2020-06-29,95663,92461,89116,86268,81981,77225,73136,69493,67667,66837,...,81965,86656,91145,95283,98608,100928,102091,102233,102332,101735
2020-06-30,100293,97156,93742,90584,85686,80248,75679,72207,69911,68557,...,81780,86211,90787,95025,98711,100839,102202,103084,103259,102523


In [12]:
dfp.index

Index([2015-07-02, 2015-07-03, 2015-07-04, 2015-07-05, 2015-07-06, 2015-07-07,
       2015-07-08, 2015-07-09, 2015-07-10, 2015-07-11,
       ...
       2020-06-22, 2020-06-23, 2020-06-24, 2020-06-25, 2020-06-26, 2020-06-27,
       2020-06-28, 2020-06-29, 2020-06-30, 2020-07-01],
      dtype='object', name='date', length=1827)

### Create Distance Matrix
Q n x n matrix that containts the distance between each point. The rows will represent "from" and the columns will represent "to" for our MST, but this is arbitrary. 

In [13]:
def create_dist_matrix(X):
   return  pd.DataFrame(distance_matrix(X.values, X.values), index=X.index, columns=X.index)

In [16]:
weights = create_dist_matrix(dfp)
weights

date,2015-07-02,2015-07-03,2015-07-04,2015-07-05,2015-07-06,2015-07-07,2015-07-08,2015-07-09,2015-07-10,2015-07-11,...,2020-06-22,2020-06-23,2020-06-24,2020-06-25,2020-06-26,2020-06-27,2020-06-28,2020-06-29,2020-06-30,2020-07-01
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-02,0.000000,21353.951742,35145.068374,32680.480780,38293.929871,37286.968273,5248.658305,14860.523847,29176.786441,24046.254864,...,10345.835152,13418.101878,26139.276463,16199.181924,11538.211517,17100.069883,21821.301542,40411.495196,47469.786549,48874.375372
2015-07-03,21353.951742,0.000000,14708.289024,21554.480393,57244.235622,54592.183323,24362.632760,35298.028387,48677.279115,38075.224701,...,18596.488970,11016.128313,13062.374478,19099.098513,28160.012926,24076.943390,26144.335486,59388.046331,65760.987964,66771.874266
2015-07-04,35145.068374,14708.289024,0.000000,19715.193456,69754.857293,68955.178906,38052.584446,48449.612836,62292.230784,51537.496544,...,29718.828106,23652.083523,14345.393233,28049.437463,41532.741109,37336.913544,37197.700870,72837.838484,79643.635502,80486.866165
2015-07-05,32680.480780,21554.480393,19715.193456,0.000000,59079.180470,68316.090572,36659.464944,41543.991226,54775.184756,46271.300403,...,24384.241776,26723.877151,18913.119573,18441.220865,35071.298878,35762.388539,29957.246819,64496.412986,72877.126940,73316.364626
2015-07-06,38293.929871,57244.235622,69754.857293,59079.180470,0.000000,37140.663403,37734.011541,24296.354994,13445.260131,28358.891163,...,41822.068074,50332.924622,60965.242122,43480.432289,29995.535601,42218.523411,40092.558487,13857.958688,25069.191969,25153.422531
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-27,17100.069883,24076.943390,37336.913544,35762.388539,42218.523411,36468.535136,19623.413668,23804.839487,31488.358722,18005.500410,...,22748.343698,20862.216709,33607.073898,24711.247095,14765.360714,0.000000,12594.203468,40171.990765,44380.750839,45388.135124
2020-06-28,21821.301542,26144.335486,37197.700870,29957.246819,40092.558487,44573.194815,25576.273712,25739.507804,32497.114180,19243.330377,...,23239.981282,25430.982148,34417.669401,21691.359524,15423.541487,12594.203468,0.000000,40528.969947,46296.418954,46602.990172
2020-06-29,40411.495196,59388.046331,72837.838484,64496.412986,13857.958688,27933.318475,39350.050216,27678.965786,11929.141168,24775.050858,...,46160.730876,52148.584640,65027.096214,48708.653862,31591.687625,40171.990765,40528.969947,0.000000,11885.643315,12300.600473
2020-06-30,47469.786549,65760.987964,79643.635502,72877.126940,25069.191969,25704.495288,46304.259016,36398.227045,20947.816020,29187.281066,...,54573.070923,58842.413130,72714.164514,57418.583229,39071.770090,44380.750839,46296.418954,11885.643315,0.000000,3614.427341


In [20]:
G = nx.from_pandas_edgelist(weights, source=weights.index, target=weights.columns)
# nx.minimum_spanning_tree(dfp, weights)
G

<networkx.classes.graph.Graph at 0x13197f79850>