-
Notifications
You must be signed in to change notification settings - Fork 1
/
a_09_ejemplos
112 lines (52 loc) · 1.71 KB
/
a_09_ejemplos
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 22 19:33:20 2019
@author: usrdel
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
directorio = "./data/artwork_data.csv"
df_artwork = pd.read_csv(directorio,
nrows=5,
index_col="id",
usecols=["id","artist"])
# Archivos Texto PDF HTML XML CSV JSON
# Binarios
# BDD Relacionales
columnas = ["id", "artist", "title",
"medium", "year", "acquisitionYear",
"height", "width", "units"]
df_completo = pd.read_csv(
directorio,
index_col="id",
usecols=columnas)
directorio_guardado = "./data/artwork_data.pickle"
df_completo.to_pickle(directorio_guardado)
df_completo_de_pickle = pd.read_pickle(
directorio_guardado)
df_completo["Nueva Columna"] = 0
df_artistas = df_completo["artist"]
artistas = pd.unique(df_artistas) # DataFrame
len(df_completo)
len(artistas)
df_artistas_vc = df_artistas.value_counts()
df_completo["medium"].value_counts()
df_group_artista = df_completo.groupby("artist")
type(df_group_artista)
for name, group_fd in df_group_artista:
print(name)
print(group_fd)
df_completo['acquisitionYear'].mean()
df_completo.tail(10)
df_completo.loc[1035,'artist']
df_completo.iloc[0:100,:]
df_completo.loc[:,'width'] = pd.to_numeric(
df_completo['width'], errors = 'coerce')
df_completo.loc[:,'height'] = pd.to_numeric(
df_completo['height'], errors = 'coerce')
df_completo["area"] = df_completo["width"] * df_completo["height"]
plt.plot(df_completo["height"] )
df_group_artista.agg(np.min)
df_group_artista.min()