In [1]:
import numpy as np

# Parameters
num_sensors = 100  # 100 sensors
hours_per_month = 30 * 24  # 720 hours (1 month)


In [2]:
# Temperature data in Celsius (random float between -5 and 35)
temperature = np.random.uniform(low=-5, high=35, size=(num_sensors, hours_per_month))

# Humidity data (random percentage between 30% and 90%)
humidity = np.random.uniform(low=30, high=90, size=(num_sensors, hours_per_month))

# Energy consumption in kWh (random float between 0 and 300)
energy_consumption = np.random.uniform(low=0, high=300, size=(num_sensors, hours_per_month))


In [3]:
# Weather conditions (categorical: sunny, cloudy, rainy)
weather_conditions = np.random.choice(['sunny', 'cloudy', 'rainy'], size=(num_sensors, hours_per_month))


In [5]:
# Vehicle count (random integers between 0 and 500 vehicles per hour)
vehicle_count = np.random.randint(low=0, high=500, size=(num_sensors, hours_per_month))

# Average speed (random float between 0 and 100 km/h)
average_speed = np.random.uniform(low=0, high=100, size=(num_sensors, hours_per_month))


In [6]:
# Pollution levels (AQI, random float between 0 and 300)
pollution_levels = np.random.uniform(low=0, high=300, size=(num_sensors, hours_per_month))


In [7]:
# Create an array of hours in the month (1D array of datetime)
time_series = np.arange(np.datetime64('2024-01-01'), np.datetime64('2024-01-31'), dtype='datetime64[h]')


In [8]:
# Replicate the time series for each sensor
time_series_replicated = np.tile(time_series, (num_sensors, 1))

# Combine all numerical data
sensor_data = np.stack([temperature, humidity, energy_consumption, vehicle_count, average_speed, pollution_levels], axis=-1)


In [9]:
sensor_data

array([[[ 1.06315159e+01,  8.65117573e+01,  1.55750639e+02,
          2.60000000e+01,  1.28355712e+01,  2.36094960e+02],
        [ 3.12008866e+01,  5.25958088e+01,  2.92325318e+02,
          1.86000000e+02,  6.90024908e+00,  2.40476525e+02],
        [ 1.90866934e+01,  8.84679880e+01,  1.67991284e+02,
          4.80000000e+02,  7.78769212e+01,  8.81356277e+01],
        ...,
        [ 9.81168490e+00,  5.35380047e+01,  8.72807517e+01,
          4.34000000e+02,  3.33804382e+01,  2.54729063e+02],
        [ 2.47384860e+01,  5.49275989e+01,  5.15352689e+01,
          1.11000000e+02,  3.92591762e+01,  2.48636430e+02],
        [ 8.82403175e+00,  7.79233566e+01,  2.12831833e+02,
          4.46000000e+02,  3.98775977e+01,  8.58722758e+01]],

       [[-1.59971384e+00,  5.62835889e+01,  2.32351714e+02,
          2.50000000e+01,  7.09885292e+01,  2.25692680e+02],
        [-3.96476259e-02,  8.79807690e+01,  1.37552661e+02,
          2.74000000e+02,  8.26045084e+01,  2.12885457e+02],
        [ 1.94940

In [10]:
import pandas as pd 

In [11]:
import pandas as pd
import numpy as np

# Assuming sensor_data has shape (10000, 8760, 6)

# Reshape sensor_data to 2D: (number of rows, number of columns)
# Number of rows will be the product of the first two dimensions of sensor_data
# Number of columns will be the last dimension of sensor_data
sensor_data_2d = sensor_data.reshape(-1, sensor_data.shape[-1]) 

# Create the DataFrame
df = pd.DataFrame(sensor_data_2d)

# Optionally, rename the columns for better readability
df.columns = ['temperature', 'humidity', 'energy_consumption', 'vehicle_count', 'average_speed', 'pollution_levels']

# Save to CSV
df.to_csv('sensor_data.csv', index=False)

In [12]:
data = pd.read_csv('sensor_data.csv')

In [13]:
data

Unnamed: 0,temperature,humidity,energy_consumption,vehicle_count,average_speed,pollution_levels
0,10.631516,86.511757,155.750639,26.0,12.835571,236.094960
1,31.200887,52.595809,292.325318,186.0,6.900249,240.476525
2,19.086693,88.467988,167.991284,480.0,77.876921,88.135628
3,23.144079,40.265149,106.034463,31.0,5.709360,251.961854
4,2.081885,44.100369,259.194895,411.0,50.269949,60.033838
...,...,...,...,...,...,...
71995,17.692570,80.302681,253.285628,489.0,33.466735,45.916903
71996,20.477974,76.974168,244.551249,78.0,42.177356,158.663394
71997,-0.712597,46.959959,79.967625,316.0,94.451194,61.390120
71998,-1.431404,80.852995,184.195059,67.0,88.752279,221.178558


In [14]:
sensor_10_first_day = sensor_data[10, :24, :]


In [14]:
sensor_10_first_day

array([[ 33.83818217,  32.43663631, 194.84130986, 472.        ,
         22.29952768, 281.0887651 ],
       [ 31.34821052,  74.12241609,   3.15049955,  81.        ,
         49.44324916,  47.86519429],
       [  2.93153606,  87.84554037, 128.60166661, 282.        ,
         12.49775571, 268.27425774],
       [ 23.25828854,  38.29317679,  91.94266304, 257.        ,
         11.82313351,  52.33058537],
       [ 25.48821485,  53.38715823, 206.9711482 , 137.        ,
         51.94810849, 165.28495443],
       [ 25.357486  ,  83.95796577, 121.41980053, 370.        ,
         24.91864369,  55.64006935],
       [ 18.50975053,  75.26905784, 105.04371134, 382.        ,
          4.98185254, 299.03568544],
       [  7.6368328 ,  38.92625362,  11.19003696, 156.        ,
          3.95147197, 103.10463722],
       [ -4.52744747,  58.84171744,  12.60983812, 416.        ,
          1.27083397, 127.77414038],
       [ 21.25646661,  66.40642551,  26.67976917, 426.        ,
         43.23773423, 263.1

In [None]:
# creating a new dataset using the s