In [195]:
import pandas as pd
import sqlite3


In [220]:
def return_table(begin_date2, end_date2):
    # Set up sqlite
    connection = sqlite3.connect('../static/data/sensors_readings_2016_present.db')
    
    # Assemble Query with proper placement of WHERE clause and using parameterized queries
    sql_query = """
    SELECT sensor_id, latitude, longitude, altitude, AVG(pm2) AS avg_pm2, AVG(pm10) AS avg_pm10
    FROM sensors_readings
    WHERE (date BETWEEN ? AND ?)
    GROUP BY sensor_id, latitude, longitude, altitude
    """
    
    # Execute the query with parameter substitution to prevent SQL injection
    df = pd.read_sql_query(sql_query, connection, params=(begin_date2, end_date2))
    
    connection.close()
    return df

In [229]:
df = return_table('2023-09-27','2023-09-30')
df

Unnamed: 0,sensor_id,latitude,longitude,altitude,avg_pm2,avg_pm10
0,77,40.750816,-111.825290,,17.717333,17.952000
1,443,41.224422,-111.968376,,6.603000,7.290167
2,525,40.783870,-111.870140,4891.0,0.902167,1.161167
3,984,40.595387,-111.807755,,1.848500,2.114833
4,3238,40.664246,-111.849230,4331.0,3.000167,3.461167
...,...,...,...,...,...,...
195,169779,40.769947,-111.847015,4758.0,1.978167,2.797833
196,169851,40.902760,-111.872420,4361.0,2.419000,3.307500
197,169867,40.871600,-111.903200,4306.0,2.413000,3.527167
198,174555,40.722404,-111.823616,4621.0,1.970000,2.423667


In [230]:
df_color = pd.read_csv('../static/data/sensor_categories.csv')
df = pd.merge(df,df_color, on = 'sensor_id')

In [231]:
averages = df.groupby('category').mean().reset_index()
averages = averages[['category','avg_pm2','avg_pm10']]
averages.rename(columns = {'avg_pm2':'cat_avg_pm2','avg_pm10':'cat_avg_pm10'}, inplace = True)
averages['cat_avg_pm2'] = round(averages['cat_avg_pm2']).astype('int')
averages['cat_avg_pm10'] = round(averages['cat_avg_pm10']).astype('int')

In [233]:
df['avg_pm2'] = round(df['avg_pm2']).astype('int')
df['avg_pm10'] = round(df['avg_pm10']).astype('int')

df = pd.merge(df,averages, on = 'category')



In [239]:
selected_colors =['red','orange','green','yellow']
df.loc[df['category'].isin(selected_colors)]

Unnamed: 0,sensor_id,latitude,longitude,altitude,avg_pm2,avg_pm10,category,cat_avg_pm2_x,cat_avg_pm10_x,cat_avg_pm2_y,cat_avg_pm10_y
0,77,40.750816,-111.825290,,18,18,red,558,559,558,559
1,443,41.224422,-111.968376,,7,7,red,558,559,558,559
2,525,40.783870,-111.870140,4891.0,1,1,yellow,106,107,106,107
3,984,40.595387,-111.807755,,2,2,green,54,55,54,55
4,3238,40.664246,-111.849230,4331.0,3,3,green,54,55,54,55
...,...,...,...,...,...,...,...,...,...,...,...
195,169779,40.769947,-111.847015,4758.0,2,3,red,558,559,558,559
196,169851,40.902760,-111.872420,4361.0,2,3,orange,136,137,136,137
197,169867,40.871600,-111.903200,4306.0,2,4,green,54,55,54,55
198,174555,40.722404,-111.823616,4621.0,2,2,green,54,55,54,55


In [240]:
df_color.category.unique()

array(['red', 'yellow', 'green', 'orange'], dtype=object)

In [56]:
# Convert the date column to datetime
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%y')

# Now you can sort by this column
df_sorted = df.sort_values(by='date')

In [60]:
df_sorted.groupby('date').count().reset_index()[['date']].to_csv('date_range.csv', index=False)