# Regimientos

In [6]:
import pandas as pd
import sqlite3

### Paso 2. Crear el DataFrame con los siguientes valores:

In [7]:
raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], 
        'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], 
        'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], 
        'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],
        'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}

### Paso 3. Asignar a una variable llamada regimiento.


## PANDAS

In [8]:
regimiento = pd.DataFrame(data=raw_data)
regimiento

Unnamed: 0,regiment,company,name,preTestScore,postTestScore
0,Nighthawks,1st,Miller,4,25
1,Nighthawks,1st,Jacobson,24,94
2,Nighthawks,2nd,Ali,31,57
3,Nighthawks,2nd,Milner,2,62
4,Dragoons,1st,Cooze,3,70
5,Dragoons,1st,Jacon,4,25
6,Dragoons,2nd,Ryaner,24,94
7,Dragoons,2nd,Sone,31,57
8,Scouts,1st,Sloan,2,62
9,Scouts,1st,Piger,3,70


## SQL

In [9]:
#SQL
# Nos conectamos y creamos la db
conexion = sqlite3.connect('Regiment.db')

# Escribimos DataFrame en una SQLite tabla
regimiento.to_sql('regimiento', conexion, index=False, if_exists='replace')

# Obtenemos un cursor que utilizaremos para hacer las queries
crsr = conexion.cursor()

In [20]:
res = crsr.execute("SELECT name FROM sqlite_master WHERE type='table'")
for name in res:
    print(name[0])

regimiento


In [19]:
# Con esta función leemos los datos y lo pasamos a un DataFrame de Pandas
def sql_query(query):

    # Ejecuta la query
    crsr.execute(query)

    # Almacena los datos de la query 
    ans = crsr.fetchall()

    # Obtenemos los nombres de las columnas de la tabla
    names = [description[0] for description in crsr.description]

    return pd.DataFrame(ans,columns=names)

In [21]:
#SQL
query = '''
SELECT *
FROM regimiento
'''

df = sql_query(query)
df

Unnamed: 0,regiment,company,name,preTestScore,postTestScore
0,Nighthawks,1st,Miller,4,25
1,Nighthawks,1st,Jacobson,24,94
2,Nighthawks,2nd,Ali,31,57
3,Nighthawks,2nd,Milner,2,62
4,Dragoons,1st,Cooze,3,70
5,Dragoons,1st,Jacon,4,25
6,Dragoons,2nd,Ryaner,24,94
7,Dragoons,2nd,Sone,31,57
8,Scouts,1st,Sloan,2,62
9,Scouts,1st,Piger,3,70


### Paso 4. ¿Cuál es la puntuación media de preTestScore del regimiento Nighthawks?  

In [10]:
Nighthawks = regimiento[regimiento['regiment'] == 'Nighthawks']
Nighthawks

Unnamed: 0,regiment,company,name,preTestScore,postTestScore
0,Nighthawks,1st,Miller,4,25
1,Nighthawks,1st,Jacobson,24,94
2,Nighthawks,2nd,Ali,31,57
3,Nighthawks,2nd,Milner,2,62


In [11]:
pretestScore_mean = Nighthawks.groupby('regiment')['preTestScore'].mean()
pretestScore_mean

regiment
Nighthawks    15.25
Name: preTestScore, dtype: float64

In [25]:
#SQL
query = '''
SELECT avg (preTestScore) as media_Pre_Test_Score
FROM regimiento
where regiment = "Nighthawks"
'''

df = sql_query(query)
df

Unnamed: 0,media_Pre_Test_Score
0,15.25



### Paso 5. Presentar las estadísticas generales por compañía


In [12]:
regimiento.name.describe()

count      12
unique     11
top       Ali
freq        2
Name: name, dtype: object

In [26]:
regimiento.company.describe()

count      12
unique      2
top       1st
freq        6
Name: company, dtype: object

In [27]:
#SQL
query = '''
SELECT count(), COUNT (DISTINCT name)
FROM regimiento
'''

df = sql_query(query)
df

Unnamed: 0,count(),COUNT (DISTINCT name)
0,12,11


### Paso 6. ¿Cuál es la media de las puntuaciones preTestScore de cada empresa?


In [13]:
pretestScore_mean_company = regimiento.groupby(['name'])['preTestScore'].mean()
pretestScore_mean_company

name
Ali         17.0
Cooze        3.0
Jacobson    24.0
Jacon        4.0
Miller       4.0
Milner       2.0
Piger        3.0
Riani        2.0
Ryaner      24.0
Sloan        2.0
Sone        31.0
Name: preTestScore, dtype: float64

In [29]:
#SQL
query = '''
SELECT avg(preTestScore) as media
FROM regimiento
GROUP BY name
'''

df = sql_query(query)
df

Unnamed: 0,media
0,17.0
1,3.0
2,24.0
3,4.0
4,4.0
5,2.0
6,3.0
7,2.0
8,24.0
9,2.0


### Paso 7. Muestra las puntuaciones medias de preTestScore agrupadas por regimiento y compañía

In [14]:
pretestScore_mean_company_regiment = regimiento.groupby(['regiment', 'company'])['preTestScore'].mean()
pretestScore_mean_company_regiment

regiment    company
Dragoons    1st         3.5
            2nd        27.5
Nighthawks  1st        14.0
            2nd        16.5
Scouts      1st         2.5
            2nd         2.5
Name: preTestScore, dtype: float64

### Paso 8. Presentar la media de las puntuaciones preTestScores agrupadas por regimiento y compañía sin usar indexación jerárquica


In [15]:
mean_preTestScores = regimiento.groupby(['regiment', 'company'])['preTestScore'].mean().reset_index()
mean_preTestScores

Unnamed: 0,regiment,company,preTestScore
0,Dragoons,1st,3.5
1,Dragoons,2nd,27.5
2,Nighthawks,1st,14.0
3,Nighthawks,2nd,16.5
4,Scouts,1st,2.5
5,Scouts,2nd,2.5


In [41]:
#SQL
query = '''
SELECT regiment as regimiento, company as compañia, avg(preTestScore) as media
FROM regimiento
GROUP BY regiment, company
'''

df = sql_query(query)
df

Unnamed: 0,regimiento,compañia,media
0,Dragoons,1st,3.5
1,Dragoons,2nd,27.5
2,Nighthawks,1st,14.0
3,Nighthawks,2nd,16.5
4,Scouts,1st,2.5
5,Scouts,2nd,2.5


### Paso 9. Agrupar todo el marco de datos por regimiento y compañía

In [34]:
data = regimiento.groupby(['regiment', 'company']).describe()
data

Unnamed: 0_level_0,Unnamed: 1_level_0,preTestScore,preTestScore,preTestScore,preTestScore,preTestScore,preTestScore,preTestScore,preTestScore,postTestScore,postTestScore,postTestScore,postTestScore,postTestScore,postTestScore,postTestScore,postTestScore
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
regiment,company,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Dragoons,1st,2.0,3.5,0.707107,3.0,3.25,3.5,3.75,4.0,2.0,47.5,31.819805,25.0,36.25,47.5,58.75,70.0
Dragoons,2nd,2.0,27.5,4.949747,24.0,25.75,27.5,29.25,31.0,2.0,75.5,26.162951,57.0,66.25,75.5,84.75,94.0
Nighthawks,1st,2.0,14.0,14.142136,4.0,9.0,14.0,19.0,24.0,2.0,59.5,48.790368,25.0,42.25,59.5,76.75,94.0
Nighthawks,2nd,2.0,16.5,20.506097,2.0,9.25,16.5,23.75,31.0,2.0,59.5,3.535534,57.0,58.25,59.5,60.75,62.0
Scouts,1st,2.0,2.5,0.707107,2.0,2.25,2.5,2.75,3.0,2.0,66.0,5.656854,62.0,64.0,66.0,68.0,70.0
Scouts,2nd,2.0,2.5,0.707107,2.0,2.25,2.5,2.75,3.0,2.0,66.0,5.656854,62.0,64.0,66.0,68.0,70.0


In [45]:
#SQL
query = '''
SELECT regiment as regimiento, company as compañia, 
count(preTestScore) as countPre, 
avg(preTestScore) as mediaPre, 
max(preTestScore) as maxPre, 
min(preTestScore) as minPre,
count(postTestScore) as countPost, 
avg(postTestScore) as mediaPost,
max(postTestScore) as maxPost, 
min(preTestScore) as minPost

FROM regimiento
GROUP BY regiment, company
'''

df = sql_query(query)
df

Unnamed: 0,regimiento,compañia,countPre,mediaPre,maxPre,minPre,countPost,mediaPost,maxPost,minPost
0,Dragoons,1st,2,3.5,4,3,2,47.5,70,3
1,Dragoons,2nd,2,27.5,31,24,2,75.5,94,24
2,Nighthawks,1st,2,14.0,24,4,2,59.5,94,4
3,Nighthawks,2nd,2,16.5,31,2,2,59.5,62,2
4,Scouts,1st,2,2.5,3,2,2,66.0,70,2
5,Scouts,2nd,2,2.5,3,2,2,66.0,70,2


### Paso 10. Cuál es el número de observaciones en cada regimiento y compañía

In [38]:
obervaciones = regimiento.groupby(['regiment', 'company']).size()
obervaciones

regiment    company
Dragoons    1st        2
            2nd        2
Nighthawks  1st        2
            2nd        2
Scouts      1st        2
            2nd        2
dtype: int64

In [40]:
#SQL
query = '''
SELECT regiment, company, count(*) as conteo
FROM regimiento
GROUP BY regiment, company
'''

df = sql_query(query)
df

Unnamed: 0,regiment,company,conteo
0,Dragoons,1st,2
1,Dragoons,2nd,2
2,Nighthawks,1st,2
3,Nighthawks,2nd,2
4,Scouts,1st,2
5,Scouts,2nd,2


### Paso 11. Iterar sobre un grupo e imprimir el nombre y los datos completos del regimiento. Haz una función llamada agrupa(variable)

In [18]:
def agrupa(variable):
    grouped_data = regimiento.groupby(variable)
    
    for nombre, datos in grouped_data:
        print(f"Nombre del {variable}: {nombre}")
        print(datos)
        print("\n")

# Llamada a la función con la columna del dataframe que queramos
agrupa('company')


Nombre del company: 1st
     regiment company      name  preTestScore  postTestScore
0  Nighthawks     1st    Miller             4             25
1  Nighthawks     1st  Jacobson            24             94
4    Dragoons     1st     Cooze             3             70
5    Dragoons     1st     Jacon             4             25
8      Scouts     1st     Sloan             2             62
9      Scouts     1st     Piger             3             70


Nombre del company: 2nd
      regiment company    name  preTestScore  postTestScore
2   Nighthawks     2nd     Ali            31             57
3   Nighthawks     2nd  Milner             2             62
6     Dragoons     2nd  Ryaner            24             94
7     Dragoons     2nd    Sone            31             57
10      Scouts     2nd   Riani             2             62
11      Scouts     2nd     Ali             3             70


