# Lectura de un CSV a un DataFrame

## Avisos Ciudadanos

![Open Data](images/Datos-Abiertos.png)

http://bit.ly/2mEQRxE

In [5]:
import pandas as pd
from io import StringIO
import requests

url="http://datos.madrid.es/egob/catalogo/212411-16-madrid-avisa.csv"
data=StringIO(requests.get(url).text)

df = pd.read_csv(data, delimiter=";", encoding = "ISO-8859-1")

# Take a sample
df = df.sample(1000)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 59084 to 355246
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   TIPO_INCIDENCIA_ID    1000 non-null   int64  
 1   TIPO_INCIDENCIA       1000 non-null   object 
 2   CANAL_DE_ENTRADA_ID   154 non-null    float64
 3   CANAL_DE_ENTRADA      1000 non-null   object 
 4   FECHA_DE_RECEPCION    1000 non-null   object 
 5   HORA_DE_RECEPCION     1000 non-null   object 
 6   SECCION_ID            1000 non-null   int64  
 7   SECCION               1000 non-null   object 
 8   ANOMALIA_ID           1000 non-null   int64  
 9   ANOMALIA              1000 non-null   object 
 10  TIPO_DE_VIAL_ID       1000 non-null   int64  
 11  TIPO_DE_VIAL          1000 non-null   object 
 12  NOMBRE_DE_VIAL        1000 non-null   object 
 13  NUMERO                1000 non-null   int64  
 14  CALIFICADOR           1000 non-null   object 
 15  DISTRITO_ID    

In [7]:
from sqlalchemy import create_engine

# Creacción de una base de datos en Postgres

In [8]:
%alias dropdb docker exec -i postgres_container dropdb -U postgres 
%alias createdb docker exec -i postgres_container createdb -U postgres 

In [9]:
dropdb avisa

dropdb: error: database removal failed: ERROR:  database "avisa" does not exist


In [10]:
createdb avisa

In [11]:
%load_ext sql

In [12]:
%sql postgresql://postgres:postgres@localhost/avisa

# Exportación de un Dataframe a una tabla en Postgres

In [13]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/avisa')
df.to_sql('incidencias', engine)

In [14]:
%%sql 
select count(*)
from incidencias

 * postgresql://postgres:***@localhost/avisa
1 rows affected.


count
1000


In [15]:
df.filter(["TIPO_INCIDENCIA_ID", "TIPO_INCIDENCIA"]).drop_duplicates().to_sql('tipo_incidencias', engine)

In [16]:
%%sql 
select *
from tipo_incidencias

 * postgresql://postgres:***@localhost/avisa
3 rows affected.


index,TIPO_INCIDENCIA_ID,TIPO_INCIDENCIA
59084,0,AVISO
233244,1,Peticion
107679,2,No conforme con resolucion


# Exportación de un DataFrame a un JSON

In [17]:
json_string = df \
                .filter(["TIPO_INCIDENCIA_ID", "TIPO_INCIDENCIA"]) \
                .drop_duplicates() \
                .to_json(orient = 'records')
json_string

'[{"TIPO_INCIDENCIA_ID":0,"TIPO_INCIDENCIA":"AVISO"},{"TIPO_INCIDENCIA_ID":1,"TIPO_INCIDENCIA":"Peticion"},{"TIPO_INCIDENCIA_ID":2,"TIPO_INCIDENCIA":"No conforme con resolucion"}]'

In [18]:
import json

json_list = json.loads(json_string)
json_list

[{'TIPO_INCIDENCIA_ID': 0, 'TIPO_INCIDENCIA': 'AVISO'},
 {'TIPO_INCIDENCIA_ID': 1, 'TIPO_INCIDENCIA': 'Peticion'},
 {'TIPO_INCIDENCIA_ID': 2, 'TIPO_INCIDENCIA': 'No conforme con resolucion'}]

In [19]:
for json in json_list:
    print(json)

{'TIPO_INCIDENCIA_ID': 0, 'TIPO_INCIDENCIA': 'AVISO'}
{'TIPO_INCIDENCIA_ID': 1, 'TIPO_INCIDENCIA': 'Peticion'}
{'TIPO_INCIDENCIA_ID': 2, 'TIPO_INCIDENCIA': 'No conforme con resolucion'}


# Importacion de un JSON a un DataFrame

In [20]:
pd.read_json(json_string)

Unnamed: 0,TIPO_INCIDENCIA_ID,TIPO_INCIDENCIA
0,0,AVISO
1,1,Peticion
2,2,No conforme con resolucion


In [21]:
pd.read_json('https://api.github.com/repos/pydata/pandas/issues?per_page=5').head()

Unnamed: 0,url,repository_url,labels_url,comments_url,events_url,html_url,id,node_id,number,title,...,milestone,comments,created_at,updated_at,closed_at,author_association,active_lock_reason,pull_request,body,performed_via_github_app
0,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/43654,1000191498,PR_kwDOAA0YD84r7br0,43654,PERF: sparse take,...,,0,2021-09-19 03:02:14+00:00,2021-09-19 03:05:21+00:00,NaT,MEMBER,,{'url': 'https://api.github.com/repos/pandas-d...,This helps some on #41023 because `__getitem__...,
1,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/43653,1000175393,PR_kwDOAA0YD84r7Za8,43653,REF: ExtensionIndex.searchsorted -> IndexOpsMi...,...,,0,2021-09-19 01:18:48+00:00,2021-09-19 01:18:48+00:00,NaT,MEMBER,,{'url': 'https://api.github.com/repos/pandas-d...,Perf-neutral on the case that motivated implem...,
2,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/issues/43652,1000157199,I_kwDOAA0YD847nTAP,43652,CI: codecov for pyx files,...,{'url': 'https://api.github.com/repos/pandas-d...,0,2021-09-18 23:23:10+00:00,2021-09-18 23:46:25+00:00,NaT,CONTRIBUTOR,,,#### Is your feature request related to a prob...,
3,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/43651,1000139026,PR_kwDOAA0YD84r7URj,43651,REF: remove JoinUnit.shape,...,,0,2021-09-18 21:31:25+00:00,2021-09-18 21:31:25+00:00,NaT,MEMBER,,{'url': 'https://api.github.com/repos/pandas-d...,- [ ] closes #xxxx\r\n- [ ] tests added / pass...,
4,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/issues/43650,1000133222,I_kwDOAA0YD847nNJm,43650,CI/BUG: pyarrow read_csv deadlock,...,,0,2021-09-18 21:00:27+00:00,2021-09-18 21:00:40+00:00,NaT,MEMBER,,,"xref #43611, #43643\r\n\r\nWhen trying to figu...",
