In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [108]:
s = pd.Series(['banana', 42, 23, 19, "hola", 5.67])
s

0    banana
1        42
2        23
3        19
4      hola
5      5.67
dtype: object

In [112]:
s1 = pd.Series(['análisis de datos', 'visualización de datos', 'gráficas', 'proyecto']
                , index = ['tema 1', 'tema 2', 'tema 3', 'tema 4'
               ])              
# El index siempre será la primera columna
s1

tema 1         análisis de datos
tema 2    visualización de datos
tema 3                  gráficas
tema 4                  proyecto
dtype: object

# ¿Cómo crear DataFrames?

In [111]:
cientificos = pd.DataFrame({
    'Nombre': ["Rosaline Franklin", "William Gosset"],
    'Ocupación': ["Químico", "Estadístico"],
    'Nacimiento': ['1920-07-25', '1876-06-13'],
    'Muerte': ['1958-04-16', '1937-10-16'],
    'Edad': [37, 61]
})
# Las columnas las ordena por orden alfabético
cientificos

Unnamed: 0,Edad,Muerte,Nacimiento,Nombre,Ocupación
0,37,1958-04-16,1920-07-25,Rosaline Franklin,Químico
1,61,1937-10-16,1876-06-13,William Gosset,Estadístico


In [113]:
cientificos2 = pd.DataFrame(
    data = {
        'Ocupación': ['Químico', 'Estadístico'],
        'Nacimiento': ['1920-07-25', '1876-06-13'],
        'Muerte': ['1958-04-16', '1937-10-16'],
        'Edad': [37, 61]
           },
    index = ['Rosaline Franklin', 'William Gosset'],
    columns = ['Ocupación', "Nacimiento", "Muerte", "Edad"]
)
# Yo defino el orden de las columnas después del index
cientificos2

Unnamed: 0,Ocupación,Nacimiento,Muerte,Edad
Rosaline Franklin,Químico,1920-07-25,1958-04-16,37
William Gosset,Estadístico,1876-06-13,1937-10-16,61


In [115]:
# Apartir de un CSV indicar qué columna es el index
cientificos3 = pd.read_csv("Datos/scientists.csv", index_col = "Name")

cientificos3

Unnamed: 0_level_0,Born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
William Gosset,1876-06-13,1937-10-16,61,Statistician
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
Marie Curie,1867-11-07,1934-07-04,66,Chemist
Rachel Carson,1907-05-27,1964-04-14,56,Biologist
John Snow,1813-03-15,1858-06-16,45,Physician
Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


# Elementos de la Serie

In [118]:
fila1 = cientificos3.loc["Rosaline Franklin"]
# El valor del elemento como indicador de los datos relacionados
fila1

Born          1920-07-25
Died          1958-04-16
Age                   37
Occupation       Chemist
Name: Rosaline Franklin, dtype: object

In [119]:
fila1.index

Index(['Born', 'Died', 'Age', 'Occupation'], dtype='object')

In [120]:
fila1.keys()

Index(['Born', 'Died', 'Age', 'Occupation'], dtype='object')

In [121]:
# Devuelve un arreglo
fila1.values

array(['1920-07-25', '1958-04-16', 37, 'Chemist'], dtype=object)

# Operaciones básicas sobre una Serie

In [123]:
edades = cientificos3["Age"]
edades

Name
Rosaline Franklin       37
William Gosset          61
Florence Nightingale    90
Marie Curie             66
Rachel Carson           56
John Snow               45
Alan Turing             41
Johann Gauss            77
Name: Age, dtype: int64

In [38]:
edades.mean()

59.125

In [39]:
edades.max()

90

In [40]:
edades.min()

37

In [41]:
# Standard
edades.std()

18.325918413937288

# Series.describe()

In [43]:
edades.describe()

count     8.000000
mean     59.125000
std      18.325918
min      37.000000
25%      44.000000
50%      58.500000
75%      68.750000
max      90.000000
Name: Age, dtype: float64

# Operaciones con Series

In [124]:
# Solo los que sean mayor a la edad media
edades[edades > edades.mean()]

Name
William Gosset          61
Florence Nightingale    90
Marie Curie             66
Johann Gauss            77
Name: Age, dtype: int64

In [47]:
edades + 100

Name
Rosaline Franklin       137
William Gosset          161
Florence Nightingale    190
Marie Curie             166
Rachel Carson           156
John Snow               145
Alan Turing             141
Johann Gauss            177
Name: Age, dtype: int64

In [48]:
edades * 2

Name
Rosaline Franklin        74
William Gosset          122
Florence Nightingale    180
Marie Curie             132
Rachel Carson           112
John Snow                90
Alan Turing              82
Johann Gauss            154
Name: Age, dtype: int64

In [49]:
edades + edades

Name
Rosaline Franklin        74
William Gosset          122
Florence Nightingale    180
Marie Curie             132
Rachel Carson           112
John Snow                90
Alan Turing              82
Johann Gauss            154
Name: Age, dtype: int64

In [50]:
edades * edades

Name
Rosaline Franklin       1369
William Gosset          3721
Florence Nightingale    8100
Marie Curie             4356
Rachel Carson           3136
John Snow               2025
Alan Turing             1681
Johann Gauss            5929
Name: Age, dtype: int64

In [127]:
# Si el valor del index coincide con los del DataFrame, suma ambos valores
edades + pd.Series([1, 99], index = ["Johann Gauss", "John Snow"])

Alan Turing               NaN
Florence Nightingale      NaN
Johann Gauss             78.0
John Snow               144.0
Marie Curie               NaN
Rachel Carson             NaN
Rosaline Franklin         NaN
William Gosset            NaN
dtype: float64

In [58]:
edades

Name
Rosaline Franklin       37
William Gosset          61
Florence Nightingale    90
Marie Curie             66
Rachel Carson           56
John Snow               45
Alan Turing             41
Johann Gauss            77
Name: Age, dtype: int64

In [129]:
edades_ordenado = edades.sort_index(ascending = False)
edades_ordenado

Name
William Gosset          61
Rosaline Franklin       37
Rachel Carson           56
Marie Curie             66
John Snow               45
Johann Gauss            77
Florence Nightingale    90
Alan Turing             41
Name: Age, dtype: int64

In [61]:
edades + edades_ordenado

Name
Alan Turing              82
Florence Nightingale    180
Johann Gauss            154
John Snow                90
Marie Curie             132
Rachel Carson           112
Rosaline Franklin        74
William Gosset          122
Name: Age, dtype: int64

In [130]:
ocupaciones = cientificos3["Occupation"]
ocupaciones

Name
Rosaline Franklin                  Chemist
William Gosset                Statistician
Florence Nightingale                 Nurse
Marie Curie                        Chemist
Rachel Carson                    Biologist
John Snow                        Physician
Alan Turing             Computer Scientist
Johann Gauss                 Mathematician
Name: Occupation, dtype: object

In [67]:
ocupaciones + " Hola"

Name
Rosaline Franklin                  Chemist Hola
William Gosset                Statistician Hola
Florence Nightingale                 Nurse Hola
Marie Curie                        Chemist Hola
Rachel Carson                    Biologist Hola
John Snow                        Physician Hola
Alan Turing             Computer Scientist Hola
Johann Gauss                 Mathematician Hola
Name: Occupation, dtype: object

In [68]:
ocupaciones * 2

Name
Rosaline Franklin                             ChemistChemist
William Gosset                      StatisticianStatistician
Florence Nightingale                              NurseNurse
Marie Curie                                   ChemistChemist
Rachel Carson                             BiologistBiologist
John Snow                                 PhysicianPhysician
Alan Turing             Computer ScientistComputer Scientist
Johann Gauss                      MathematicianMathematician
Name: Occupation, dtype: object

In [69]:
ocupaciones + ocupaciones

Name
Rosaline Franklin                             ChemistChemist
William Gosset                      StatisticianStatistician
Florence Nightingale                              NurseNurse
Marie Curie                                   ChemistChemist
Rachel Carson                             BiologistBiologist
John Snow                                 PhysicianPhysician
Alan Turing             Computer ScientistComputer Scientist
Johann Gauss                      MathematicianMathematician
Name: Occupation, dtype: object

In [70]:
ocupaciones * ocupaciones

TypeError: can't multiply sequence by non-int of type 'str'

# Operaciones con DataFrames

In [72]:
cientificos3

Unnamed: 0_level_0,Born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
William Gosset,1876-06-13,1937-10-16,61,Statistician
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
Marie Curie,1867-11-07,1934-07-04,66,Chemist
Rachel Carson,1907-05-27,1964-04-14,56,Biologist
John Snow,1813-03-15,1858-06-16,45,Physician
Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [74]:
# Muestra los de edad mayor a la media
cientificos3[
    cientificos3["Age"] > cientificos3["Age"].mean()
            ]

Unnamed: 0_level_0,Born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
William Gosset,1876-06-13,1937-10-16,61,Statistician
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
Marie Curie,1867-11-07,1934-07-04,66,Chemist
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [75]:
cientificos3 + 2

TypeError: Could not operate 2 with block values must be str, not int

In [76]:
cientificos3 * 2

Unnamed: 0_level_0,Born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rosaline Franklin,1920-07-251920-07-25,1958-04-161958-04-16,74,ChemistChemist
William Gosset,1876-06-131876-06-13,1937-10-161937-10-16,122,StatisticianStatistician
Florence Nightingale,1820-05-121820-05-12,1910-08-131910-08-13,180,NurseNurse
Marie Curie,1867-11-071867-11-07,1934-07-041934-07-04,132,ChemistChemist
Rachel Carson,1907-05-271907-05-27,1964-04-141964-04-14,112,BiologistBiologist
John Snow,1813-03-151813-03-15,1858-06-161858-06-16,90,PhysicianPhysician
Alan Turing,1912-06-231912-06-23,1954-06-071954-06-07,82,Computer ScientistComputer Scientist
Johann Gauss,1777-04-301777-04-30,1855-02-231855-02-23,154,MathematicianMathematician


In [131]:
cientificos3a = cientificos3[:4] # Hasta el 4 row
cientificos3b = cientificos3[4:] # Desde el 4 row

In [78]:
cientificos3a

Unnamed: 0_level_0,Born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
William Gosset,1876-06-13,1937-10-16,61,Statistician
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
Marie Curie,1867-11-07,1934-07-04,66,Chemist


In [79]:
cientificos3b

Unnamed: 0_level_0,Born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rachel Carson,1907-05-27,1964-04-14,56,Biologist
John Snow,1813-03-15,1858-06-16,45,Physician
Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [80]:
# Reinicializa los valores
cientificos3a + cientificos3b

Unnamed: 0_level_0,Born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alan Turing,,,,
Florence Nightingale,,,,
Johann Gauss,,,,
John Snow,,,,
Marie Curie,,,,
Rachel Carson,,,,
Rosaline Franklin,,,,
William Gosset,,,,


In [132]:
nacimientos = cientificos3["Born"]
nacimientos

Name
Rosaline Franklin       1920-07-25
William Gosset          1876-06-13
Florence Nightingale    1820-05-12
Marie Curie             1867-11-07
Rachel Carson           1907-05-27
John Snow               1813-03-15
Alan Turing             1912-06-23
Johann Gauss            1777-04-30
Name: Born, dtype: object

In [136]:
nac_fecha = pd.to_datetime(
    nacimientos, 
    format = "%Y-%m-%d"
                          )
# Identifica los valores numéricos como fechas en el tiempo por año, mes y día
nac_fecha

Name
Rosaline Franklin      1920-07-25
William Gosset         1876-06-13
Florence Nightingale   1820-05-12
Marie Curie            1867-11-07
Rachel Carson          1907-05-27
John Snow              1813-03-15
Alan Turing            1912-06-23
Johann Gauss           1777-04-30
Name: Born, dtype: datetime64[ns]

In [137]:
muerte_fecha = pd.to_datetime(
    cientificos3["Died"], 
    format = "%Y-%m-%d"
                             )
# Identifica los valores numéricos como fechas en el tiempo por año, mes y día
muerte_fecha

Name
Rosaline Franklin      1958-04-16
William Gosset         1937-10-16
Florence Nightingale   1910-08-13
Marie Curie            1934-07-04
Rachel Carson          1964-04-14
John Snow              1858-06-16
Alan Turing            1954-06-07
Johann Gauss           1855-02-23
Name: Died, dtype: datetime64[ns]

In [138]:
cientificos3["born_date"], cientificos3["dead_date"] = (nac_fecha, muerte_fecha)
# Agrega 2 columnas con el array correspondiente
cientificos3

Unnamed: 0_level_0,Born,Died,Age,Occupation,born_date,dead_date
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1920-07-25,1958-04-16
William Gosset,1876-06-13,1937-10-16,61,Statistician,1876-06-13,1937-10-16
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,1820-05-12,1910-08-13
Marie Curie,1867-11-07,1934-07-04,66,Chemist,1867-11-07,1934-07-04
Rachel Carson,1907-05-27,1964-04-14,56,Biologist,1907-05-27,1964-04-14
John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16
Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1912-06-23,1954-06-07
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,1777-04-30,1855-02-23


In [89]:
# Calcula la edad
edad_calculada = cientificos3["dead_date"] - cientificos3["born_date"]

In [139]:
edad_calculada_y = edad_calculada.astype('timedelta64[Y]') # Cast to DType Year

In [143]:
cientificos3["edad_c"] = edad_calculada_y
# Agrega una columna con el siguiente array
cientificos3

Unnamed: 0_level_0,Born,Died,Age,Occupation,born_date,dead_date,edad_c
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1920-07-25,1958-04-16,37.0
William Gosset,1876-06-13,1937-10-16,61,Statistician,1876-06-13,1937-10-16,61.0
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,1820-05-12,1910-08-13,90.0
Marie Curie,1867-11-07,1934-07-04,66,Chemist,1867-11-07,1934-07-04,66.0
Rachel Carson,1907-05-27,1964-04-14,56,Biologist,1907-05-27,1964-04-14,56.0
John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16,45.0
Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1912-06-23,1954-06-07,41.0
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,1777-04-30,1855-02-23,77.0


In [144]:
# Elimina el row a partir del siguiente valor en el index_col
cientificos3.drop(["John Snow"])

Unnamed: 0_level_0,Born,Died,Age,Occupation,born_date,dead_date,edad_c
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1920-07-25,1958-04-16,37.0
William Gosset,1876-06-13,1937-10-16,61,Statistician,1876-06-13,1937-10-16,61.0
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,1820-05-12,1910-08-13,90.0
Marie Curie,1867-11-07,1934-07-04,66,Chemist,1867-11-07,1934-07-04,66.0
Rachel Carson,1907-05-27,1964-04-14,56,Biologist,1907-05-27,1964-04-14,56.0
Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1912-06-23,1954-06-07,41.0
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,1777-04-30,1855-02-23,77.0


In [146]:
# Elimina las columnas
cientificos3.drop(["Born", "Died"], axis = 1)

Unnamed: 0_level_0,Age,Occupation,born_date,dead_date,edad_c
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Rosaline Franklin,37,Chemist,1920-07-25,1958-04-16,37.0
William Gosset,61,Statistician,1876-06-13,1937-10-16,61.0
Florence Nightingale,90,Nurse,1820-05-12,1910-08-13,90.0
Marie Curie,66,Chemist,1867-11-07,1934-07-04,66.0
Rachel Carson,56,Biologist,1907-05-27,1964-04-14,56.0
John Snow,45,Physician,1813-03-15,1858-06-16,45.0
Alan Turing,41,Computer Scientist,1912-06-23,1954-06-07,41.0
Johann Gauss,77,Mathematician,1777-04-30,1855-02-23,77.0
