In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('desafio-base1/agenda.csv',parse_dates=['Data'], encoding='iso-8859-1',quotechar='"', delimiter='|')
print(df.dtypes)

Código                      int64
Data               datetime64[ns]
CodConvenio               float64
HoraConsulta               object
Tipo                       object
Valor                     float64
HoraAtendimento            object
HoraFim                    object
Usuário                    object
dtype: object


In [3]:
df_renamed = df.rename(
    columns = {
        "Código": "physician_id",
        "Data": "date",
        "Usuário": "patient_name",
        "HoraConsulta": "arrival_time",
        "HoraAtendimento": "start_time",
        "HoraFim": "end_time",
        "Tipo": "name",
        "Valor": "value",
        "CodConvenio": "healthinsurance_name",
    }
)

In [4]:
#Neste caso não tem a data de entrada, logo deixarei só o tempo na coluna de arrival_time
df_renamed['arrival_time'] = pd.to_datetime(df_renamed['arrival_time'], format='%H:%M:%S').dt.time
df_renamed['start_time'] = pd.to_datetime(df_renamed['start_time'], format='%H:%M:%S').dt.time
df_renamed['end_time'] = pd.to_datetime(df_renamed['end_time'], format='%H:%M:%S').dt.time

In [5]:
df_renamed.head(8)

Unnamed: 0,physician_id,date,healthinsurance_name,arrival_time,name,value,start_time,end_time,patient_name
0,91,2020-09-10,9.0,14:13:35,1ª vez,107.0,19:00:49,22:15:06,José
1,100,1997-06-01,13.0,NaT,Consulta,123.0,NaT,NaT,Teste 1
2,564,1999-09-10,2.0,18:31:24,Retorno,212.0,22:31:27,23:11:29,Teste 1
3,585,1995-06-13,,21:55:27,1ª vez,19.0,22:03:14,23:02:27,Teste 1
4,588,2015-11-20,12.0,11:13:07,,4.0,12:16:58,17:14:26,Teste 1
5,263,2006-09-04,9.0,20:53:21,1ª vez,,23:00:35,23:44:26,Teste 1
6,193,2016-06-13,,18:54:53,Consulta,48.0,20:25:36,22:17:12,Teste 1
7,155,1971-02-22,2.0,14:30:55,Consulta,,16:05:21,18:33:25,Teste 1


In [6]:
#Não sei se vale a pena ainda
df_renamed['healthinsurance_name'] = df_renamed['healthinsurance_name'].fillna(0)
df_renamed = df_renamed.astype({"healthinsurance_name": int}, errors='raise')

#Preenchendo os valores NaN com 0
df_renamed['value'] = df_renamed['value'].fillna(0)

In [7]:
#adicionei o +1 para começar o indice por 1 ao invés de 0
df_renamed['patient_id'] = pd.factorize(df_renamed['patient_name'])[0]+1

In [8]:
#o certo é colocar cp ou sc?
df_renamed['status'] = 'cp'
df_renamed['patient_home_phone'] = np.nan
df_renamed['patient_mobile_phone'] = np.nan
df_renamed['description'] = np.nan
df_renamed['all_day'] = np.nan
df_renamed['cancel_reason'] = np.nan  
df_renamed['patient_email'] = np.nan  
df_renamed['event_blocked_scheduling'] = np.nan
df_renamed['quantity'] = np.nan  

In [9]:
#usei force_ascii só para ver a escrita bonitinha em utf-8 pensando em colocar 'quantity'
df_renamed['eventprocedure_pack'] = ('json::['+df_renamed[['name','value']].apply(lambda x: x.to_json(force_ascii =  False),axis=1)+']')

In [10]:
df_renamed.head(5)

Unnamed: 0,physician_id,date,healthinsurance_name,arrival_time,name,value,start_time,end_time,patient_name,patient_id,status,patient_home_phone,patient_mobile_phone,description,all_day,cancel_reason,patient_email,event_blocked_scheduling,quantity,eventprocedure_pack
0,91,2020-09-10,9,14:13:35,1ª vez,107.0,19:00:49,22:15:06,José,1,cp,,,,,,,,,"json::[{""name"":""1ª vez"",""value"":107.0}]"
1,100,1997-06-01,13,NaT,Consulta,123.0,NaT,NaT,Teste 1,2,cp,,,,,,,,,"json::[{""name"":""Consulta"",""value"":123.0}]"
2,564,1999-09-10,2,18:31:24,Retorno,212.0,22:31:27,23:11:29,Teste 1,2,cp,,,,,,,,,"json::[{""name"":""Retorno"",""value"":212.0}]"
3,585,1995-06-13,0,21:55:27,1ª vez,19.0,22:03:14,23:02:27,Teste 1,2,cp,,,,,,,,,"json::[{""name"":""1ª vez"",""value"":19.0}]"
4,588,2015-11-20,12,11:13:07,,4.0,12:16:58,17:14:26,Teste 1,2,cp,,,,,,,,,"json::[{""name"":null,""value"":4.0}]"


In [11]:
df_rearanged = df_renamed.loc[:, ["patient_id","patient_name","physician_id","date","status","patient_home_phone","patient_mobile_phone","arrival_time","start_time","end_time","description","all_day","cancel_reason","patient_email","event_blocked_scheduling","healthinsurance_name","eventprocedure_pack"]]

In [12]:
df_rearanged.to_csv('event_scheduling.csv',index=False, encoding='utf-8')
