# Oversikt av datasettene

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
rsrp_colors = ['#384959', '#6A89A7', '#88BDF2', '#BDDDFC']
rsrq_colors = ['#614419', '#B37E2E', '#DB9A39', '#FFB343']
sinr_colors = ['#135E4B', '#4CB572', '#A1D8B5', '#CCDCDB']
mode_colors = ['#872323', '#C93636', '#E26666', '#F4B6B6']

In [4]:
'''
    Reading the dataset and returning two pandas DataFrames:
        The first DataFrame containing every datapoint belonging to the given campaign (if any)
        The second DataFrame containing only one column - the mode (4G or 5G)
'''
def read_dataset(filename, campaigns=None, features=None):
    df = pd.read_csv(filename)
    df = df.loc[:, ~df.columns.str.match('Unnamed')]
    df = df.loc[:, ~df.columns.str.match('Timestamp')]
    df = df.replace('DC', 1)
    df = df.replace('LTE', 0)
    df = df[df['campaign'].str.contains('Driving') | df['campaign'].str.contains('Walking')]

    if campaigns != None:
        df = df[df['campaign'].isin(campaigns)]

    if features == None:
        features = ['RSRP', 'SINR', 'RSRQ', 'SSS_RSRP', 'SSS_SINR', 'SSS_RSRQ', 'campaign']

    features.append('Mode')
    features.append('Latitude')
    features.append('Longitude')
    
    return df[features]

In [12]:
op1_df = read_dataset('datasets/Op1_merged.csv')
op2_df = read_dataset('datasets/Op2_merged.csv')

op1_mode = op1_df['Mode']
op2_mode = op2_df['Mode']

print(f'OP1 datapoint: {len(op1_mode)}\n5G in OP1 ≈ {np.sum(op1_mode) / len(op1_mode) * 100:.1f}\n4G in OP1 ≈ {(1 - np.sum(op1_mode) / len(op1_mode)) * 100:.1f}\n')
print(f'OP2 datapoint: {len(op2_mode)}\n5G in OP2 ≈ {np.sum(op2_mode) / len(op2_mode) * 100:.1f}\n4G in OP2 ≈ {(1 - np.sum(op2_mode) / len(op2_mode)) * 100:.1f}')

OP1 datapoint: 72540
5G in OP1 ≈ 33.0
4G in OP1 ≈ 67.0

OP2 datapoint: 100099
5G in OP2 ≈ 29.5
4G in OP2 ≈ 70.5
