# Desafio 2 - Maratona Behind the Code 2021

## Bibliotecas

In [1]:
import os

import pandas as pd
from IPython.display import display

Se necessário, instale o cloudant:

In [None]:
# !pip install cloudant

## Obtenção dos dados de IOT

In [2]:
def get_df(file_name):
    try:
        # When running locally
        result = pd.read_csv(file_name)
    except FileNotFoundError:
        try:
            # When running on IBM cloud
            from project_lib import Project

            PROJECT_ID = os.getenv("PROJECT_ID")
            PROJECT_ACCESS_TOKEN = os.getenv("PROJECT_ACCESS_TOKEN")
            project = Project(
                project_id=PROJECT_ID, project_access_token=PROJECT_ACCESS_TOKEN
            )
            try:
                # When available on assets
                result = read_from_assets(project, file_name)
            except RuntimeError as e:
                # When not available on assets
                df = get_from_cloudant()
                project.save_data(file_name=file_name, data=df.to_csv(index=False))
                result = read_from_assets(project, file_name)
        except ModuleNotFoundError:
            df = get_from_cloudant()
            df.to_csv(file_name, index=False)
            result = pd.read_csv(file_name)
    return result


def read_from_assets(project, file_name):
    file = project.get_file(file_name)
    file.seek(0)
    return pd.read_csv(file)


def get_from_cloudant():
    print("Getting data from cloudant...")
    from cloudant import Cloudant

    CLOUDANT_USER = os.getenv("CLOUDANT_USER")
    CLOUDANT_PASSWORD = os.getenv("CLOUDANT_PASSWORD")
    client = Cloudant(
        CLOUDANT_USER,
        CLOUDANT_PASSWORD,
        account=CLOUDANT_USER,
        connect=True,
        auto_renew=True,
    )
    db = client["quanam-iot"]
    response = db.all_docs(include_docs=True)
    docs = [row["doc"] for row in response["rows"]]
    challenge_columns = ["ID", "ILLUM", "HUMID", "CO2", "SOUND", "TEMP", "RYTHM"]
    return pd.DataFrame(docs)[challenge_columns].copy()

In [3]:
df = get_df("iot.csv")

## Exploração dos dados

In [4]:
with pd.option_context("display.max_rows", 4):
    display(df)

Unnamed: 0,ID,ILLUM,HUMID,CO2,SOUND,TEMP,RYTHM
0,2408,347.16,64.54,539.34,21.01,21.11,74.67
1,2424,435.80,71.66,518.60,30.21,18.43,78.02
...,...,...,...,...,...,...,...
3198,2392,445.78,64.84,546.35,31.68,17.09,79.92
3199,2403,488.24,61.63,552.23,29.54,21.36,88.97


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3200 entries, 0 to 3199
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID      3200 non-null   int64  
 1   ILLUM   3200 non-null   float64
 2   HUMID   3200 non-null   float64
 3   CO2     3200 non-null   float64
 4   SOUND   3200 non-null   float64
 5   TEMP    3200 non-null   float64
 6   RYTHM   3200 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 175.1 KB


In [6]:
df.describe()

Unnamed: 0,ID,ILLUM,HUMID,CO2,SOUND,TEMP,RYTHM
count,3200.0,3200.0,3200.0,3200.0,3200.0,3200.0,3200.0
mean,1600.5,439.163413,67.086106,501.620666,29.840194,20.042578,80.156616
std,923.904757,89.915142,4.776214,42.951183,6.15871,1.648479,6.755854
min,1.0,145.48,51.86,352.82,8.06,14.76,55.37
25%,800.75,377.06,63.77,471.4275,25.55,18.92,75.6575
50%,1600.5,437.375,66.935,502.35,29.885,20.05,80.22
75%,2400.25,500.7925,70.33,530.1425,33.9325,21.18,84.66
max,3200.0,775.14,83.9,672.38,53.78,26.29,108.9
