In [1]:
import pandas as pd
import math 
from arango import ArangoClient
import time 
from datetime import timedelta

In [2]:
client = ArangoClient(hosts="http://localhost:8529")
sys_db = client.db("_system", username="root", password="client")


In [3]:
sys_db.has_database("seminar2")

True

In [4]:
db = client.db("seminar2", username="root", password="client")

# Data import and datetime preparation 

Import data from .csv files and convert the timestamps from strings to datetime data type 

## Important note

Converting the timestamps into datetime objects will produce an error inserting them into collection documents, because those documents are from .json type and by that time this programm does not support inserting datetime objects to the server. 

In [5]:
waage_df = pd.read_csv("waage_df.csv", index_col = 0)
heiz_df = pd.read_csv("ir_temp_heiz_df.csv", index_col=0)
qual_df= pd.read_csv("ir_temp_qual_df.csv", index_col=0)
kamera_df = pd.read_csv("kamera_df.csv", index_col=0)
couple_df = pd.read_csv("thermo_couple_heiz_df.csv", index_col=0)
umgebung_df = pd.read_csv("umgebung_df.csv", index_col=0)


In [59]:
umgebung_df["env_iaq_index"]=umgebung_df["env_iaq_index"].astype(float)

In [60]:
umgebung_df.head()

Unnamed: 0,uuid,timestamp_start,env_temperature,env_humidity,env_iaq_index,env_pressure
0,54f145d2-e871-4bb9-8f9f-ee72e2ced9cd,2022-01-27 09:15:54.499085,20.89,28.28,92.0,1006.69
1,a3ec2e1f-fa73-4c7c-9b2b-76c11e607ded,2022-01-27 09:17:32.314592,20.92,28.28,92.0,1006.72
2,3fad8ca6-c3f5-4646-8d13-4d12c23bce4b,2022-01-27 09:19:40.265449,20.95,28.12,89.0,1006.71
3,c047f085-5295-487c-983b-822973db31d8,2022-01-27 09:21:17.686957,20.96,28.13,88.0,1006.71
4,d635cbe2-f5dc-4bc2-af97-b7dfb4f52dbc,2022-01-27 09:22:55.582707,21.0,28.08,88.0,1006.69


In [57]:
for i in umgebung_df:
    print(type(umgebung_df.loc[0,i]))

<class 'str'>
<class 'str'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.int64'>
<class 'numpy.float64'>


# Send data to database

### Generate collections for each data origin:
    - ir_temp_heiz
    - ir_temp_qual
    - kamera
    - thermo_couple_heiz
    - umgebung
    - waage

## Testing

In [None]:
waage_df.iloc[0,1]

In [None]:
type(heiz_df.iloc[0,1])

In [None]:
waage_df.iloc[0,1]-heiz_df.iloc[0,1]

In [None]:
umgebung_df.iloc[0,0]

In [33]:
waage_df.loc[0, "uuid"]

'54f145d2-e871-4bb9-8f9f-ee72e2ced9cd'

In [37]:
get_object(waage_df, 2, "Waage")

{'_key': 'WM2',
 'uuid': '3fad8ca6-c3f5-4646-8d13-4d12c23bce4b',
 'timestamp_qual_start': '2022-01-27 09:20:07.455048',
 'qual_weight': 417.0}

In [51]:
#create_coll_from_df(waage_df, "waage")

# Functions and error messages

## Function 1
Accept (dataframe, name_string) to create a collection with name _name_string_ and creates a document for each row in the dataframe

In [49]:
def create_coll_from_df(df, name_string): 
    if db.has_collection(name_string): 
        print("Collection already exists. Would you like to delete it and create an empty one?")
        if input("y/n:")=="y":
            db.delete_collection(name_string)
            db.create_collection(name_string)
            print("collection created from scratch")
            print("y")
        else:
            return err_existing_col
    print("check 1")
    col = db.collection(name_string)
    print("check: passed coll assignment")
    for i in df.index:
        print("for check 1")
        #print("i:"+ i)
        col.insert(get_object(df, i, name_string))  
        print("for check 2")
    return "worked"

In [48]:
col = db.collection("heiz")
#for i in heiz_df.index: 
#    print(i)

In [15]:
heiz_col = db.collection("heiz")
heiz_col.insert(get_object(heiz_df, 1, "Heiz"))

{'_id': 'heiz/HM1', '_key': 'HM1', '_rev': '_eoFgaX6---'}

In [13]:
get_object(heiz_df, 0, "Heiz")


{'_key': 'HM0',
 'uuid': '54f145d2-e871-4bb9-8f9f-ee72e2ced9cd',
 'timestamp_prod_start': '2022-01-27 09:16:01.750997',
 'timestamp_start': '2022-01-27 09:15:54.499085',
 'prod_obj_temp': 18.4,
 'prod_amb_temp': 19.8,
 'timestamp_prod_end': '2022-01-27 09:16:11.800704',
 'timestamp_end': '2022-01-27 09:16:57.162335',
 'start_date': '2022-01-27',
 'end_date': '2022-01-27'}

## Function 2
Create the object {...} thats passed to the collection.insert() function. This is necessary because of the different dimensions of the objects generated in collections. For example a document in the _heiz_ collection will have 9 entries, whereas the _waage_ collection documents will have 3 entries. (_key attributes not included) 

In [40]:
def get_object (df, row, collection_name): 
    col_count = len(df.columns)
    key_string = collection_name[0] + "M" + str(row)
    obj = {"_key": key_string}
    for j in df.columns: 
        obj[j] = df.loc[row, j]
    return obj

## Error messages

In [None]:
err_existing_col = "Error creating collection, collection already exists"