# Exploring Data Models and their Transformations

In [3]:
import numpy as np
import pandas as pd

# Structural Transformation: From Relations to Matrices and Back
- Matrix $\rightarrow$ Relational works.
- Relational $\rightarrow$ Matrix sometimes works!
- But how?

To start, let's take our matrix in `mm.txt`, and load it into Pandas.

In [4]:
mm = pd.read_csv('data/mm.txt', header=0)
mm

Unnamed: 0,Year,OCT,NOV,DEC,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP
0,2002,545.92,3115.08,3996.76,1815.74,1204.14,1644.02,795.92,540.24,112.62,79.52,22.2,171.7
1,2003,55.41,1242.23,2976.94,797.72,836.01,1026.11,1571.27,468.59,24.93,98.33,267.4,99.2
2,2004,55.9,834.4,2311.72,942.75,2019.22,399.52,339.18,251.64,72.38,55.57,116.74,97.48
3,2006,347.22,908.44,2981.16,1793.97,995.27,2031.19,1602.55,287.21,102.44,90.31,18.75,33.76
4,2005,1449.23,619.77,1789.93,1777.23,1055.41,1472.91,743.11,1113.26,309.19,46.61,86.51,93.98
5,2007,178.81,942.89,1279.33,320.66,1615.47,317.77,519.85,150.15,85.32,102.6,62.74,164.02
6,2008,612.92,329.64,1189.43,2153.17,1007.57,316.55,153.53,255.8,42.45,41.73,36.83,12.59
7,2009,272.27,777.81,1102.87,533.64,1479.8,881.3,297.8,580.28,320.84,35.95,53.25,42.99
8,2010,722.61,379.36,1029.03,1780.54,1136.93,814.83,1225.52,487.75,146.9,38.05,46.77,88.72
9,2011,1059.68,1016.81,2555.51,463.27,1059.94,2173.84,465.8,552.36,350.04,84.51,21.59,76.77


## What does an unpivot look like (Matrix -> Relational)?

In [5]:
mm_melted = mm.melt(id_vars=['Year'])
mm_melted

Unnamed: 0,Year,variable,value
0,2002,OCT,545.92
1,2003,OCT,55.41
2,2004,OCT,55.90
3,2006,OCT,347.22
4,2005,OCT,1449.23
...,...,...,...
223,2015,SEP,84.94
224,2016,SEP,52.88
225,2017,SEP,156.63
226,2018,SEP,14.06


Thanks to the `id_var` parameter, the `Year` column is named and repeated for all other (variable=column name, value=value) elements in the row.

In [4]:
mm_melted[mm_melted['Year'] == 2002]

Unnamed: 0,Year,variable,value
0,2002,OCT,545.92
19,2002,NOV,3115.08
38,2002,DEC,3996.76
57,2002,JAN,1815.74
76,2002,FEB,1204.14
95,2002,MAR,1644.02
114,2002,APR,795.92
133,2002,MAY,540.24
152,2002,JUN,112.62
171,2002,JUL,79.52


## PIVOT(UNPIVOT) = ??

In [6]:
mm_melted.pivot(index='variable', columns='Year')

Unnamed: 0_level_0,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value
Year,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
variable,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
APR,795.92,1571.27,339.18,743.11,1602.55,519.85,153.53,297.8,1225.52,465.8,828.51,259.37,376.71,343.53,427.64,825.07,578.42,442.48,509.5
AUG,22.2,267.4,116.74,86.51,18.75,62.74,36.83,53.25,46.77,21.59,92.7,96.91,163.17,49.52,28.19,72.88,22.84,48.34,25.99
DEC,3996.76,2976.94,2311.72,1789.93,2981.16,1279.33,1189.43,1102.87,1029.03,2555.51,156.69,1759.84,178.54,1654.38,1530.09,1155.19,118.69,585.14,1202.38
FEB,1204.14,836.01,2019.22,1055.41,995.27,1615.47,1007.57,1479.8,1136.93,1059.94,421.75,189.12,1314.08,860.72,367.08,2110.15,201.54,2261.94,70.34
JAN,1815.74,797.72,942.75,1777.23,1793.97,320.66,2153.17,533.64,1780.54,463.27,1066.21,338.4,233.12,149.87,1754.74,2628.17,847.21,1282.06,578.53
JUL,79.52,98.33,55.57,46.61,90.31,102.6,41.73,35.95,38.05,84.51,98.85,72.63,96.86,201.23,37.56,53.72,68.03,25.14,17.71
JUN,112.62,24.93,72.38,309.19,102.44,85.32,42.45,320.84,146.9,350.04,164.73,142.22,42.21,109.07,110.11,118.32,43.35,48.86,70.86
MAR,1644.02,1026.11,399.52,1472.91,2031.19,317.77,316.55,881.3,814.83,2173.84,1643.46,391.51,978.52,243.26,1395.37,751.9,1540.98,1090.67,789.14
MAY,540.24,468.59,251.64,1113.26,287.21,150.15,255.8,580.28,487.75,552.36,115.19,254.56,179.59,338.5,245.77,153.14,201.33,743.01,375.76
NOV,3115.08,1242.23,834.4,619.77,908.44,942.89,329.64,777.81,379.36,1016.81,523.53,1131.15,281.26,551.6,635.79,581.67,967.23,754.07,311.24


Oops, that was the transpose of the original; we can specifically pivot on Month instead:
<br/> <br/>

In [10]:
mm_back = mm_melted.pivot(index='Year', columns='variable')
mm_back

Unnamed: 0_level_0,value,value,value,value,value,value,value,value,value,value,value,value
variable,APR,AUG,DEC,FEB,JAN,JUL,JUN,MAR,MAY,NOV,OCT,SEP
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2002,795.92,22.2,3996.76,1204.14,1815.74,79.52,112.62,1644.02,540.24,3115.08,545.92,171.7
2003,1571.27,267.4,2976.94,836.01,797.72,98.33,24.93,1026.11,468.59,1242.23,55.41,99.2
2004,339.18,116.74,2311.72,2019.22,942.75,55.57,72.38,399.52,251.64,834.4,55.9,97.48
2005,743.11,86.51,1789.93,1055.41,1777.23,46.61,309.19,1472.91,1113.26,619.77,1449.23,93.98
2006,1602.55,18.75,2981.16,995.27,1793.97,90.31,102.44,2031.19,287.21,908.44,347.22,33.76
2007,519.85,62.74,1279.33,1615.47,320.66,102.6,85.32,317.77,150.15,942.89,178.81,164.02
2008,153.53,36.83,1189.43,1007.57,2153.17,41.73,42.45,316.55,255.8,329.64,612.92,12.59
2009,297.8,53.25,1102.87,1479.8,533.64,35.95,320.84,881.3,580.28,777.81,272.27,42.99
2010,1225.52,46.77,1029.03,1136.93,1780.54,38.05,146.9,814.83,487.75,379.36,722.61,88.72
2011,465.8,21.59,2555.51,1059.94,463.27,84.51,350.04,2173.84,552.36,1016.81,1059.68,76.77


<br/><br/>
This is basically the same as the original!

### [At Home] Extra Columns
Let's go back to `mmp.txt`. 
- Matrix or relation? 
- Try doing some PIVOT/UNPIVOT work on this.

In [13]:
mmp = pd.read_csv('data/mmp.txt', header=0)
mmp

Unnamed: 0,Year,ID,Location,Station,OCT,NOV,DEC,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP
0,2002,4BK,BROOKINGS,SOUTHERN OREGON COASTAL,12.86,29.06,34.64,34.64,18.20,12.10,13.24,7.30,7.36,0.04,0.06,2.90
1,2002,ASHO3,ASHLAND,SOUTHERN OREGON COASTAL,0.76,7.00,6.82,2.64,2.58,2.58,5.84,3.76,0.16,0.56,0.00,0.40
2,2002,COPO3,COPPER 4NE,SOUTHERN OREGON COASTAL,0.58,13.36,13.96,6.84,3.98,3.60,0.00,0.00,0.00,0.00,0.00,0.54
3,2002,CVJO3,CAVE JUNCTION,SOUTHERN OREGON COASTAL,4.92,27.20,29.62,19.52,12.92,9.26,3.88,1.78,0.00,0.00,0.00,0.66
4,2002,GOLO3,GOLD BEACH,SOUTHERN OREGON COASTAL,9.26,23.44,33.18,29.16,17.78,13.24,9.46,3.00,4.18,0.04,0.00,1.24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6203,2020,KENC1,KENTFIELD,RUSSIAN...NAPA...SAN FRANCISCO BAY,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
6204,2020,MHMC1,MOUNT HAMILTON,RUSSIAN...NAPA...SAN FRANCISCO BAY,0.08,1.65,6.36,3.12,0.00,3.80,2.94,1.55,0.00,0.00,0.02,0.00
6205,2020,NSHC1,NAPA STATE HOSPITAL,RUSSIAN...NAPA...SAN FRANCISCO BAY,0.00,0.96,5.21,2.09,0.00,1.59,1.11,2.92,0.00,0.00,0.06,0.00
6206,2020,OAMC1,OAKLAND MUSEUM,RUSSIAN...NAPA...SAN FRANCISCO BAY,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00


In [24]:
# Unpivot
mmp_melted = mmp.melt(id_vars=['Year','ID','Location', 'Station'])
mmp_melted

Unnamed: 0,Year,ID,Location,Station,variable,value
0,2002,4BK,BROOKINGS,SOUTHERN OREGON COASTAL,OCT,12.86
1,2002,ASHO3,ASHLAND,SOUTHERN OREGON COASTAL,OCT,0.76
2,2002,COPO3,COPPER 4NE,SOUTHERN OREGON COASTAL,OCT,0.58
3,2002,CVJO3,CAVE JUNCTION,SOUTHERN OREGON COASTAL,OCT,4.92
4,2002,GOLO3,GOLD BEACH,SOUTHERN OREGON COASTAL,OCT,9.26
...,...,...,...,...,...,...
74491,2020,KENC1,KENTFIELD,RUSSIAN...NAPA...SAN FRANCISCO BAY,SEP,0.00
74492,2020,MHMC1,MOUNT HAMILTON,RUSSIAN...NAPA...SAN FRANCISCO BAY,SEP,0.00
74493,2020,NSHC1,NAPA STATE HOSPITAL,RUSSIAN...NAPA...SAN FRANCISCO BAY,SEP,0.00
74494,2020,OAMC1,OAKLAND MUSEUM,RUSSIAN...NAPA...SAN FRANCISCO BAY,SEP,0.00


In [27]:
# Repivot the unpivot

mmp_tt = mmp_melted.pivot(index=['Year', 'ID', 'Location', 'Station'], columns='variable')
mmp_tt

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value,value,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,variable,APR,AUG,DEC,FEB,JAN,JUL,JUN,MAR,MAY,NOV,OCT,SEP
Year,ID,Location,Station,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2002,4BK,BROOKINGS,SOUTHERN OREGON COASTAL,13.24,0.06,34.64,18.20,34.64,0.04,7.36,12.10,7.30,29.06,12.86,2.90
2002,4LW,LAKEVIEW,SACRAMENTO...YUBA...FEATHER...AMERICAN,6.10,0.08,7.02,3.48,2.22,0.00,0.48,1.66,0.28,6.30,1.08,0.26
2002,ABYC1,AUBERRY,SAN JOAQUIN,1.92,0.00,14.20,2.26,4.38,0.00,0.20,4.66,1.66,6.88,1.80,0.00
2002,ADNC1,ADIN,SACRAMENTO...YUBA...FEATHER...AMERICAN,3.68,0.00,4.74,2.44,3.24,0.00,0.20,2.80,0.92,4.92,0.42,0.00
2002,ALTC1,ALTURAS,SACRAMENTO...YUBA...FEATHER...AMERICAN,2.50,0.00,3.72,3.40,1.58,0.06,0.00,0.90,0.46,5.22,0.54,0.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020,WVIC1,WATSONVILLE WATERWORKS,CENTRAL CALIFORNIA COASTAL,3.01,0.00,10.08,0.00,1.93,0.00,0.03,3.75,0.82,0.56,0.00,0.00
2020,YKAC1,YREKA,LOWER KLAMATH,0.73,0.08,2.02,0.03,2.08,0.11,0.27,0.96,2.03,0.66,0.43,0.02
2020,YNPC1,SOUTH ENTRANCE YOSEMITE,SAN JOAQUIN,3.93,0.04,2.88,0.44,1.32,0.01,0.15,6.66,2.38,1.66,0.09,0.00
2020,YPQC1,YOSEMITE PARK HEADQUARTERS,SAN JOAQUIN,3.89,0.02,4.60,0.00,1.37,0.08,0.10,4.58,1.98,2.00,0.00,0.00


In [26]:
mmp_tt.reset_index()

Unnamed: 0_level_0,Year,ID,Location,Station,value,value,value,value,value,value,value,value,value,value,value,value
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,APR,AUG,DEC,FEB,JAN,JUL,JUN,MAR,MAY,NOV,OCT,SEP
0,2002,4BK,BROOKINGS,SOUTHERN OREGON COASTAL,13.24,0.06,34.64,18.20,34.64,0.04,7.36,12.10,7.30,29.06,12.86,2.90
1,2002,4LW,LAKEVIEW,SACRAMENTO...YUBA...FEATHER...AMERICAN,6.10,0.08,7.02,3.48,2.22,0.00,0.48,1.66,0.28,6.30,1.08,0.26
2,2002,ABYC1,AUBERRY,SAN JOAQUIN,1.92,0.00,14.20,2.26,4.38,0.00,0.20,4.66,1.66,6.88,1.80,0.00
3,2002,ADNC1,ADIN,SACRAMENTO...YUBA...FEATHER...AMERICAN,3.68,0.00,4.74,2.44,3.24,0.00,0.20,2.80,0.92,4.92,0.42,0.00
4,2002,ALTC1,ALTURAS,SACRAMENTO...YUBA...FEATHER...AMERICAN,2.50,0.00,3.72,3.40,1.58,0.06,0.00,0.90,0.46,5.22,0.54,0.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6203,2020,WVIC1,WATSONVILLE WATERWORKS,CENTRAL CALIFORNIA COASTAL,3.01,0.00,10.08,0.00,1.93,0.00,0.03,3.75,0.82,0.56,0.00,0.00
6204,2020,YKAC1,YREKA,LOWER KLAMATH,0.73,0.08,2.02,0.03,2.08,0.11,0.27,0.96,2.03,0.66,0.43,0.02
6205,2020,YNPC1,SOUTH ENTRANCE YOSEMITE,SAN JOAQUIN,3.93,0.04,2.88,0.44,1.32,0.01,0.15,6.66,2.38,1.66,0.09,0.00
6206,2020,YPQC1,YOSEMITE PARK HEADQUARTERS,SAN JOAQUIN,3.89,0.02,4.60,0.00,1.37,0.08,0.10,4.58,1.98,2.00,0.00,0.00


# [At Home] Multisets to Sets

### Set up connections and schema

In [28]:
%reload_ext sql
%sql postgresql://127.0.0.1:5432/postgres
import pandas as pd

In [29]:
%%sql
DROP TABLE IF EXISTS blue;
DROP TABLE IF EXISTS red;
CREATE TABLE blue (last TEXT, first TEXT);
CREATE TABLE red (last TEXT, first TEXT);

INSERT INTO blue VALUES ('Wang', 'Daisy');
INSERT INTO blue VALUES ('Wang', 'Daisy');
INSERT INTO blue VALUES ('Wang', 'Xin');

INSERT INTO red VALUES ('Wang', 'Daisy');
INSERT INTO red VALUES ('Wang', 'Xin');
INSERT INTO red VALUES ('Wang', 'Xin');

SELECT * FROM blue;

last,first
Wang,Daisy
Wang,Daisy
Wang,Xin


In [31]:
%sql SELECT * FROM red;

last,first
Wang,Daisy
Wang,Xin
Wang,Xin


## Representing multiset relations as counted-set relations

### Use a CTAS statement with group by to convert standard tables to counted-set tables

In [32]:
%%sql
DROP TABLE IF EXISTS bluem;
CREATE TABLE bluem AS 
    SELECT *, COUNT(*) AS multiplicity
    FROM blue
    GROUP BY last, first;

SELECT * FROM bluem;

last,first,multiplicity
Wang,Xin,1
Wang,Daisy,2


In [33]:
%%sql
DROP TABLE IF EXISTS redm;
CREATE TABLE redm AS
    SELECT *, COUNT(*) AS multiplicity
    FROM red
    GROUP BY last, first;

SELECT * FROM redm;

last,first,multiplicity
Wang,Xin,2
Wang,Daisy,1


### How do we make selection on counted-set tables work like multisets?

This works exactly the same in both cases. There's nothing special here. Applying `WHERE` filters on a counted-set will always yield a set back, because you're only removing rows from a set. By definition, this cannot create an entity that is not a set. 

In [34]:
%%sql
-- sigma on multiset
SELECT * FROM blue 
WHERE first = 'Daisy';

last,first
Wang,Daisy
Wang,Daisy


In [35]:
%%sql
-- sigma on counted set
SELECT * FROM bluem WHERE first = 'Daisy';

last,first,multiplicity
Wang,Daisy,2


### What about projection?

We might want to be a bit careful here. See, what defines a set uniquely is its key, and in this case, the key is the combination of `(last, first)`. Simply having `last` or just having `first` is not enough to uniquely identify a row.

In [36]:
%%sql
-- pi on multiset
SELECT last FROM blue;

last
Wang
Wang
Wang


In fact, you can see that if you simply selected `last` from a counted-set, you'd get a multi-set as your output.

In [37]:
%%sql
SELECT last FROM bluem;

last
Wang
Wang


To convert this to a counted-set again, you need to sum up the multiplicities of the tuples that the `last` field came from.

In [38]:
%%sql
-- pi on counted set
SELECT last, SUM(multiplicity)
FROM bluem group by last;

last,sum
Wang,3


### What about cross-product?

In [39]:
%%sql
-- x on multiset
SELECT * FROM blue, red;

last,first,last_1,first_1
Wang,Daisy,Wang,Daisy
Wang,Daisy,Wang,Xin
Wang,Daisy,Wang,Xin
Wang,Daisy,Wang,Daisy
Wang,Daisy,Wang,Xin
Wang,Daisy,Wang,Xin
Wang,Xin,Wang,Daisy
Wang,Xin,Wang,Xin
Wang,Xin,Wang,Xin


Next, convert *the output* of a multiset cross-product to a counted set as we did before. This is our desired result:

In [40]:
%%sql
-- convert multiset x to counted set
WITH cte(blast, bfirst, rlast, rfirst)
    AS (SELECT * FROM blue, red)
SELECT *, COUNT(*)
  from cte
GROUP BY blast, bfirst, rlast, rfirst;

blast,bfirst,rlast,rfirst,count
Wang,Daisy,Wang,Daisy,2
Wang,Xin,Wang,Xin,2
Wang,Daisy,Wang,Xin,4
Wang,Xin,Wang,Daisy,1


Now, what went on in the arithmetic here? We can think this through by pushing the arithmetic into the query!

First, what do you get with naive cross-product of counted sets? You get the names from each table, along with the number of times that each name showed up in its respective table. So, for example, `('Wang', 'Xin')` showed up once in `blue` and twice in `red`.

In [41]:
%%sql
SELECT * FROM bluem, redm;

last,first,multiplicity,last_1,first_1,multiplicity_1
Wang,Xin,1,Wang,Xin,2
Wang,Xin,1,Wang,Daisy,1
Wang,Daisy,2,Wang,Xin,2
Wang,Daisy,2,Wang,Daisy,1


What does each row tell us individually? Each row tells us the number of times that the name from the left must be matched with the name from the right in the original cross product between `blue` and `red`. So if you multiply the multiplicities together, you'll get the number of instances of each ordered pair of names in the final cross product

In [42]:
%%sql
-- fix multiplicity per row
SELECT b.last, b.first, r.last, r.first, b.multiplicity*r.multiplicity
FROM bluem b, redm r;

last,first,last_1,first_1,?column?
Wang,Xin,Wang,Xin,2
Wang,Xin,Wang,Daisy,1
Wang,Daisy,Wang,Xin,4
Wang,Daisy,Wang,Daisy,2


If we simply wanted to drop duplicates instead of monitoring how many there were (the point of a counted-set), our life would have been a lot easier...

In [43]:
%%sql
SELECT DISTINCT b.last, b.first, r.last, r.first
FROM blue b, red r;

last,first,last_1,first_1
Wang,Daisy,Wang,Daisy
Wang,Xin,Wang,Xin
Wang,Daisy,Wang,Xin
Wang,Xin,Wang,Daisy


# [At Home] Transposing Demo Code

Just setting up my dataframe

In [44]:
%reload_ext sql
%sql postgresql://127.0.0.1:5432/postgres
import pandas as pd

In [45]:
%%sql
DROP TABLE IF EXISTS example;
CREATE TABLE example(name text, age integer, gpa float);
INSERT INTO example VALUES
       ('Patty Perfect', 22, 4.0),
       ('Sameer Soclose', 20, 3.99),
       ('Jacob Excellent', 21, 3.93);

<br/><br/>
Let's look at this dataframe and its data types <br/><br/>

In [46]:
df = %sql SELECT * FROM example;
df = df.DataFrame()

In [47]:
display(df)
display(df.dtypes)

Unnamed: 0,name,age,gpa
0,Patty Perfect,22,4.0
1,Sameer Soclose,20,3.99
2,Jacob Excellent,21,3.93


name     object
age       int64
gpa     float64
dtype: object

<br/><br/> Now what if we transpose; what are the data types then? 

In [48]:
dft = df.transpose()
display(dft)
display(dft.dtypes)

Unnamed: 0,0,1,2
name,Patty Perfect,Sameer Soclose,Jacob Excellent
age,22,20,21
gpa,4.0,3.99,3.93


0    object
1    object
2    object
dtype: object

<br/><br/>

What if we transpose the transpose?? We lose the data types! <br/><br/>

In [49]:
dftt = df.transpose().transpose()
display(dftt)
display(dftt.dtypes)

Unnamed: 0,name,age,gpa
0,Patty Perfect,22,4.0
1,Sameer Soclose,20,3.99
2,Jacob Excellent,21,3.93


name    object
age     object
gpa     object
dtype: object

<br/><br/>
But we can induce the types again if needed!
<br/><br/>

In [50]:
dftt['age'] = dftt['age'].astype(int)
dftt['gpa'] = dftt['gpa'].astype(float)

In [51]:
dftt.dtypes

name     object
age       int64
gpa     float64
dtype: object