In [1]:
%pip install scipy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd

## Round 3: Converting To and From Different Data Formats

In [3]:
df = pd.read_csv('../data/MentalRotation.csv')
df.head()

Unnamed: 0,Subject,Trial,Angle,Matching,Response,Time,Correct,Age,Sex
0,49,1,0,0,n,3107,1,32,M
1,49,2,150,0,n,2930,1,32,M
2,49,3,150,1,b,1874,1,32,M
3,49,4,100,1,b,3793,1,32,M
4,49,5,50,1,b,2184,1,32,M


## .mat files

In [10]:
import os
import scipy.io

In [7]:
scipy.io.whosmat
scipy.io.savemat
scipy.io.loadmat
scipy.io.matlab.matfile_version

<function scipy.io.matlab._miobase.matfile_version(file_name, *, appendmat=True)>

In [11]:
os.makedirs('../data/alternates', exist_ok=True)

## Roundtrip tests: Saving and Loading Data to see what comes out

Can you just stick a dataframe in a mat file and expect it to do something?

In [22]:
scipy.io.savemat('../data/alternates/mental_rot.mat', df)
scipy.io.loadmat('../data/alternates/mental_rot.mat')

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Aug 15 16:46:34 2023',
 '__version__': '1.0',
 '__globals__': [],
 'Subject': array([[49, 49, 49, ..., 33, 33, 33]], dtype=int64),
 'Trial': array([[ 1,  2,  3, ..., 94, 95, 96]], dtype=int64),
 'Angle': array([[  0, 150, 150, ...,  50, 100,   0]], dtype=int64),
 'Matching': array([[0, 0, 1, ..., 0, 1, 0]], dtype=int64),
 'Response': array([[array(['n'], dtype='<U1'), array(['n'], dtype='<U1'),
         array(['b'], dtype='<U1'), ..., array(['n'], dtype='<U1'),
         array(['b'], dtype='<U1'), array(['n'], dtype='<U1')]],
       dtype=object),
 'Time': array([[3107, 2930, 1874, ..., 1226, 2783, 1017]], dtype=int64),
 'Correct': array([[1, 1, 1, ..., 1, 1, 1]], dtype=int64),
 'Age': array([[32, 32, 32, ..., 20, 20, 20]], dtype=int64),
 'Sex': array([[array(['M'], dtype='<U1'), array(['M'], dtype='<U1'),
         array(['M'], dtype='<U1'), ..., array(['F'], dtype='<U1'),
         array(['F'], dtype='<U1'), array(['F'],

Okay, that's because a dataframe is already "dict-like". 

What about in a record array?

In [26]:
scipy.io.savemat('../data/alternates/mental_rot.mat', df.to_records())
# scipy.io.loadmat('../data/alternates/mental_rot.mat')

AttributeError: recarray has no attribute items

This doesn't work, because `savemat` is expecting a dictionary of variables!  Let's try that.

In [29]:
df.to_records()

rec.array([(   0, 49,  1,   0, 0, 'n', 3107, 1, 32, 'M'),
           (   1, 49,  2, 150, 0, 'n', 2930, 1, 32, 'M'),
           (   2, 49,  3, 150, 1, 'b', 1874, 1, 32, 'M'), ...,
           (5068, 33, 94,  50, 0, 'n', 1226, 1, 20, 'F'),
           (5069, 33, 95, 100, 1, 'b', 2783, 1, 20, 'F'),
           (5070, 33, 96,   0, 0, 'n', 1017, 1, 20, 'F')],
          dtype=[('index', '<i8'), ('Subject', '<i8'), ('Trial', '<i8'), ('Angle', '<i8'), ('Matching', '<i8'), ('Response', 'O'), ('Time', '<i8'), ('Correct', '<i8'), ('Age', '<i8'), ('Sex', 'O')])

In [28]:
scipy.io.savemat('../data/alternates/mental_rot.mat', dict(data=df.to_records()))
scipy.io.loadmat('../data/alternates/mental_rot.mat')


{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Aug 15 16:48:47 2023',
 '__version__': '1.0',
 '__globals__': [],
 'data': array([[(array([[0]], dtype=int64), array([[49]], dtype=int64), array([[1]], dtype=int64), array([[0]], dtype=int64), array([[0]], dtype=int64), array(['n'], dtype='<U1'), array([[3107]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')),
         (array([[1]], dtype=int64), array([[49]], dtype=int64), array([[2]], dtype=int64), array([[150]], dtype=int64), array([[0]], dtype=int64), array(['n'], dtype='<U1'), array([[2930]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')),
         (array([[2]], dtype=int64), array([[49]], dtype=int64), array([[3]], dtype=int64), array([[150]], dtype=int64), array([[1]], dtype=int64), array(['b'], dtype='<U1'), array([[1874]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')

The data is...transposed, I think?

In [31]:
scipy.io.savemat('../data/alternates/mental_rot.mat', dict(data=df.to_records().T))
scipy.io.loadmat('../data/alternates/mental_rot.mat')

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Aug 15 16:49:44 2023',
 '__version__': '1.0',
 '__globals__': [],
 'data': array([[(array([[0]], dtype=int64), array([[49]], dtype=int64), array([[1]], dtype=int64), array([[0]], dtype=int64), array([[0]], dtype=int64), array(['n'], dtype='<U1'), array([[3107]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')),
         (array([[1]], dtype=int64), array([[49]], dtype=int64), array([[2]], dtype=int64), array([[150]], dtype=int64), array([[0]], dtype=int64), array(['n'], dtype='<U1'), array([[2930]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')),
         (array([[2]], dtype=int64), array([[49]], dtype=int64), array([[3]], dtype=int64), array([[150]], dtype=int64), array([[1]], dtype=int64), array(['b'], dtype='<U1'), array([[1874]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')

Nope, that's not correct. 

What if we put a dataframe into the data dict?

In [32]:
scipy.io.savemat('../data/alternates/mental_rot.mat', dict(data=df))
scipy.io.loadmat('../data/alternates/mental_rot.mat')

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Aug 15 16:50:47 2023',
 '__version__': '1.0',
 '__globals__': [],
 'data': array([[array([[49]]), array([[1]]), array([[0]]), ..., array([[1]]),
         array([[32]]), array(['M'], dtype='<U1')],
        [array([[49]]), array([[2]]), array([[150]]), ..., array([[1]]),
         array([[32]]), array(['M'], dtype='<U1')],
        [array([[49]]), array([[3]]), array([[150]]), ..., array([[1]]),
         array([[32]]), array(['M'], dtype='<U1')],
        ...,
        [array([[33]]), array([[94]]), array([[50]]), ..., array([[1]]),
         array([[20]]), array(['F'], dtype='<U1')],
        [array([[33]]), array([[95]]), array([[100]]), ..., array([[1]]),
         array([[20]]), array(['F'], dtype='<U1')],
        [array([[33]]), array([[96]]), array([[0]]), ..., array([[1]]),
         array([[20]]), array(['F'], dtype='<U1')]], dtype=object)}

Interesting, It's now a record array.

In [34]:
scipy.io.savemat('../data/alternates/mental_rot.mat', dict(data=df))
data = scipy.io.loadmat('../data/alternates/mental_rot.mat')
pd.DataFrame(data['data'])

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,[[49]],[[1]],[[0]],[[0]],[n],[[3107]],[[1]],[[32]],[M]
1,[[49]],[[2]],[[150]],[[0]],[n],[[2930]],[[1]],[[32]],[M]
2,[[49]],[[3]],[[150]],[[1]],[b],[[1874]],[[1]],[[32]],[M]
3,[[49]],[[4]],[[100]],[[1]],[b],[[3793]],[[1]],[[32]],[M]
4,[[49]],[[5]],[[50]],[[1]],[b],[[2184]],[[1]],[[32]],[M]
...,...,...,...,...,...,...,...,...,...
5066,[[33]],[[92]],[[150]],[[1]],[b],[[2095]],[[1]],[[20]],[F]
5067,[[33]],[[93]],[[150]],[[0]],[n],[[2125]],[[1]],[[20]],[F]
5068,[[33]],[[94]],[[50]],[[0]],[n],[[1226]],[[1]],[[20]],[F]
5069,[[33]],[[95]],[[100]],[[1]],[b],[[2783]],[[1]],[[20]],[F]


Whoa.

In [38]:
vals = data['data']
vals

array([[array([[49]]), array([[1]]), array([[0]]), ..., array([[1]]),
        array([[32]]), array(['M'], dtype='<U1')],
       [array([[49]]), array([[2]]), array([[150]]), ..., array([[1]]),
        array([[32]]), array(['M'], dtype='<U1')],
       [array([[49]]), array([[3]]), array([[150]]), ..., array([[1]]),
        array([[32]]), array(['M'], dtype='<U1')],
       ...,
       [array([[33]]), array([[94]]), array([[50]]), ..., array([[1]]),
        array([[20]]), array(['F'], dtype='<U1')],
       [array([[33]]), array([[95]]), array([[100]]), ..., array([[1]]),
        array([[20]]), array(['F'], dtype='<U1')],
       [array([[33]]), array([[96]]), array([[0]]), ..., array([[1]]),
        array([[20]]), array(['F'], dtype='<U1')]], dtype=object)

In [40]:
vals.shape

(5071, 9)

Oh, I see.  We lost the `dtypes`. But that didn't happen with record arrays...

In [45]:
scipy.io.savemat('../data/alternates/mental_rot.mat', dict(data=df.to_records()))
data = scipy.io.loadmat('../data/alternates/mental_rot.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Aug 15 16:53:51 2023',
 '__version__': '1.0',
 '__globals__': [],
 'data': array([[(array([[0]], dtype=int64), array([[49]], dtype=int64), array([[1]], dtype=int64), array([[0]], dtype=int64), array([[0]], dtype=int64), array(['n'], dtype='<U1'), array([[3107]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')),
         (array([[1]], dtype=int64), array([[49]], dtype=int64), array([[2]], dtype=int64), array([[150]], dtype=int64), array([[0]], dtype=int64), array(['n'], dtype='<U1'), array([[2930]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')),
         (array([[2]], dtype=int64), array([[49]], dtype=int64), array([[3]], dtype=int64), array([[150]], dtype=int64), array([[1]], dtype=int64), array(['b'], dtype='<U1'), array([[1874]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')

In [51]:
pd.Dataframe(data['data'])

AttributeError: module 'pandas' has no attribute 'Dataframe'

In [52]:
vals = data['data']
vals.shape

(1, 5071)

Oh, geez.  Right, record arrays.

In [53]:
pd.DataFrame.from_records(vals)

ValueError: Per-column arrays must each be 1-dimensional

In [55]:
scipy.io.savemat('../data/alternates/mental_rot.mat', dict(data=df.to_records().T))
data = scipy.io.loadmat('../data/alternates/mental_rot.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Aug 15 16:55:31 2023',
 '__version__': '1.0',
 '__globals__': [],
 'data': array([[(array([[0]], dtype=int64), array([[49]], dtype=int64), array([[1]], dtype=int64), array([[0]], dtype=int64), array([[0]], dtype=int64), array(['n'], dtype='<U1'), array([[3107]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')),
         (array([[1]], dtype=int64), array([[49]], dtype=int64), array([[2]], dtype=int64), array([[150]], dtype=int64), array([[0]], dtype=int64), array(['n'], dtype='<U1'), array([[2930]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')),
         (array([[2]], dtype=int64), array([[49]], dtype=int64), array([[3]], dtype=int64), array([[150]], dtype=int64), array([[1]], dtype=int64), array(['b'], dtype='<U1'), array([[1874]], dtype=int64), array([[1]], dtype=int64), array([[32]], dtype=int64), array(['M'], dtype='<U1')

In [56]:
vals = data['data']
vals.shape

(1, 5071)

In [62]:
pd.DataFrame.from_records(vals.T)

ValueError: Per-column arrays must each be 1-dimensional