In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
from pymongo import MongoClient
import re

# Connect to database

In [2]:
# Connect to database server
client = MongoClient(
    'mongodb+srv://emiresenov96:3KNTVNjFjFsy4MMd@test-atlas.mkf29x3.mongodb.net/?retryWrites=true&w=majority'
)

# Get database
db = client.magnetron

# Get collection
collection = db.experiments

# Data processing

In [3]:
def process_df(df):
    '''
    Converts time stamp column from String datetime to seconds starting from 0 
    and rounds values of selected columns (all but PC Source columns) to 3 decimals
    
    Parameters
    ----------
    df: pandas df of experiment CSV
    
    
    Returns
    ---------
    date: string of experiment date (mm/dd/yy)
    df: pandas df with converted time column and 3 decimal values for all columns
    '''
    
    # Convert to datetime
    df['Time Stamp'] = pd.to_datetime(df['Time Stamp'])
    
    # Save date
    date = df['Time Stamp'].iloc[0].date().strftime('%m/%d/%Y')

    # Convert datetime to seconds starting from zero
    df['Time Stamp'] = (df['Time Stamp'] - df['Time Stamp'].iloc[0]).dt.total_seconds()
    
    # Round selected columns to three decimals
    rounded_columns = []

    for i in df.columns.values:
        if 'PC Source' not in i:
            rounded_columns.append(i)

    df[rounded_columns] = df[rounded_columns].round(3)

    
    return date, df

# Load files and define parameters

In [6]:
files = glob.glob("../data/*.CSV")

# Need to specify how we link metadata to CSV files
meta_txt = glob.glob("../data/*.txt")
f = open(meta_txt[0], "r")
metadata = f.readlines()
metadata_columns = metadata[0].split()

# Enter standard deviation settling threshold
sigma_t = 1

# Enter experiment time threshold
t = 0.5

# Define steady state condition parameters
p1 = 'Power Supply 1 Voltage'
p2 = 'Power Supply 3 Voltage'
p3 = 'Power Supply 5 DC Bias'

params = [p1,p2,p3]

# Load to database

In [5]:
# Iterate through experiments (CSV files)
for i in range(len(files)):
    
    # Read csv
    df = pd.read_csv(files[i], skiprows=[0,1])
    
    # Get metadata for experiment
    metadata_vals = metadata[i+1].split()
    
    # Create metadata dictionary for experiment
    meta_dict = dict(zip(metadata_columns, metadata_vals))
    
    # Retrieve date and processed df
    date, df = process_df(df)
    
    # Create date dictionary
    date_dict = {'Date' : date}

    # Get size
    rows = len(df.axes[0])
    cols = len(df.axes[1])
    
    # Find steady state threshold as index
    n_threshold = int(t*rows)

    # Calculate if experiment is steady
    for j in range(0, n_threshold):

        # If experiment is steady
        if all(df[p].tail(rows-j).std() <= sigma_t for p in params):
            
            # Get settling time
            settle_time_key = 'Settling time'
            settle_time = df["Time Stamp"].iloc[j]
            
            # Prepare calculation dictionary
            calc_keys = [settle_time_key]
            calc_vals = [settle_time]
            
            # Calculate settled mean and std for all columns
            for k in df.columns[1:]:
                mean_key = k + ' Mean'
                std_key = k + ' STD'
                mean = df[k].tail(rows-j).mean()
                std = df[k].tail(rows-j).std()
                
                # Round off values that are not PC Source
                if 'PC Source' not in k:
                    mean = np.round(mean, 3)
                    std = np.round(std, 3)
                    
                calc_keys.extend([mean_key, std_key])
                calc_vals.extend([mean, std])
            
            # Create calculation dictionary
            calc_dict = dict(zip(calc_keys, calc_vals))
            
            # Merge all dictionaries into a doc
            doc = {**date_dict, **meta_dict, **df.to_dict('list'), **calc_dict}
            
            # Insert doc into database collection
            collection.insert_one(doc)
            
            break