In [3]:
import os
import pandas as pd
# List all files and directories in the current directory
all_files = os.listdir('.')

# Navigate to the "datasets" subdirectory
datasets_dir = 'datasets'
if datasets_dir in all_files and os.path.isdir(datasets_dir):
    os.chdir(datasets_dir)

# List all files and directories in the "datasets" subdirectory
datasets_files = os.listdir('.')
print("Files in the 'datasets' subdirectory:")
for filename in datasets_files:
    print(filename)


Files in the 'datasets' subdirectory:
Aranet4 2441A_2024-03-20T16_05_03-0400.csv
aranet4.csv
aranetExp.csv


In [8]:
import os

filename = '../datasets/Aranet4 2441A_2024-03-20T16_05_03-0400.csv'
if os.path.isfile(filename):
    df = pd.read_csv(filename)
    aranet4 = df.copy()
    aranet4.head()
else:
    print("File not found: ", filename)


In [9]:
def clean_datetime_column(df, column_name):
    # Convert the column to datetime format
    df[column_name] = pd.to_datetime(df[column_name], format='%d/%m/%Y %I:%M:%S %p')
    
    # Create separate 'Date' and 'Time' columns in the desired formats
    df['Date'] = df[column_name].dt.strftime('%m/%d/%Y')
    df['Time'] = df[column_name].dt.strftime('%H:%M:%S')
    # Ensure the Date and Time columns are of type string
    df['Date'] = df['Date'].astype(str)
    df['Time'] = df['Time'].astype(str)

    # Combine Date and Time into a single datetime column
    df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
    # Drop the original column
    df.drop(column_name, axis=1, inplace=True)
    return df

# Clean the date-time column
aranet4 = clean_datetime_column(aranet4, 'Time(DD/MM/YYYY h:mm:ss A)')
aranet4 = aranet4.drop(aranet4.index[:4])
aranet4.head(10)

Unnamed: 0,Carbon dioxide(ppm),Temperature(°F),Relative humidity(%),Atmospheric pressure(hPa),Date,Time,Datetime
4,1014,77.8,47.0,1007.3,01/12/2024,16:08:15,2024-01-12 16:08:15
5,884,78.5,46.0,1007.3,01/12/2024,16:09:15,2024-01-12 16:09:15
6,914,79.7,43.0,1007.4,01/12/2024,16:10:15,2024-01-12 16:10:15
7,906,79.7,42.0,1007.2,01/12/2024,16:11:15,2024-01-12 16:11:15
8,896,79.3,41.0,1007.3,01/12/2024,16:12:15,2024-01-12 16:12:15
9,862,78.6,41.0,1007.1,01/12/2024,16:13:15,2024-01-12 16:13:15
10,917,78.1,41.0,1007.2,01/12/2024,16:14:15,2024-01-12 16:14:15
11,897,77.5,41.0,1007.1,01/12/2024,16:15:15,2024-01-12 16:15:15
12,883,76.9,41.0,1007.2,01/12/2024,16:16:15,2024-01-12 16:16:15
13,893,76.5,42.0,1007.1,01/12/2024,16:17:15,2024-01-12 16:17:15


In [10]:
aranet4.to_csv('../datasets/aranet4.csv', index=False)

In [11]:
import sqlalchemy as sql
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Text, delete, insert, DateTime, Float, Boolean, select, distinct, inspect, Time
from datetime import datetime

# Set the DATABASE_URL environment variable
DATABASE_URL = os.environ.get("DATABASE_URL")
if DATABASE_URL.startswith("postgres://"):
    DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://", 1)

# Create the database engine
engine = sql.create_engine(DATABASE_URL)

metadata = MetaData(bind = engine)


In [13]:
aranet4.to_sql('aranet4', engine, if_exists='replace', index=False)


857

In [17]:
metadata = MetaData()
# Query
def query(sql_query):
    data = pd.read_sql_query(sql_query, engine)
    data.index += 1
    return data

q = """
SELECT *
FROM aranet_lecture_data
"""

aranetExp = query(q)
aranetExp.to_csv('../datasets/aranetExp.csv', index=False)
