here we will:
1. make the proposal date field to type date and 
2. remove the 'platform' column from the proposal table; this is redundant as we have this field on the deployment table; this is a property of deployments, not of proposals
3. make votes date precision to seconds

In [1]:
IN_FILE = '../../parquets/proposals.parquet'
OUT_FILE = '../../parquets/proposals.parquet'

In [2]:
# open ../parquets_version5/proposals.parquet
import pyarrow.parquet as pq
import pyarrow as pa

proposals = pq.read_table(IN_FILE)
proposals

pyarrow.Table
id: string
deployment_id: string
platform_proposal_id: string
author: string
date: timestamp[ns]
votes_count: int64
platform: string
title: string
description: string
start: timestamp[ns]
end: timestamp[ns]
----
id: [["63752d1d-5678-5156-80b3-aa4a52ca399f","ba7ae0e4-261e-53d6-8ffb-338a9b78fe99","4664058c-09d3-57de-8db7-24590e824bd8","b7e51f59-dc39-5326-8d09-5a622159c631","c378293a-4a49-5919-9341-12daca894e40",...,"f3aff0b2-b5f2-5b1f-aff1-35af7ab6274a","bc86cac1-1d4c-5f13-8a1f-db3aab1436bf","054284bf-1b65-507b-9f64-12773c61900c","2267277e-eb9e-59bd-9b5a-6a7a834231f6","c97b8c8f-ae21-594b-9474-b3a53a6f4797"],["7b64ad83-a5d3-57b3-ac2a-5e76ab6dce01","caf02ea1-8e27-5afa-aff6-acf143258ee6","299e2ee8-c0a1-52ff-9a92-15e44dd8c0d0","3b0ca426-28cd-58d0-9868-85ec10131261","1d3b2ad8-158e-5893-a143-eb20d456ca02",...,"d651a17a-3573-5bc6-bb82-33c64d1eb722","06939d5f-4308-595f-bbc1-ee13972015fa","f036d639-9064-57d3-9e6d-85ec3b5e7be2","30186bed-d4e9-58eb-bb0e-3d3178c2928d","b4ae4379-753d-5b

In [3]:
# remove the 6th column
proposals = proposals.drop('platform')

In [4]:
import pandas as pd
proposals_df = proposals.to_pandas()
proposals_df['date'] = pd.to_datetime(proposals_df['date'], format='%Y-%m-%d')
proposals_df.head()

Unnamed: 0,id,deployment_id,platform_proposal_id,author,date,votes_count,title,description,start,end
0,63752d1d-5678-5156-80b3-aa4a52ca399f,1ff410f5-1c80-53cd-9558-d93d1f6de121,0x0025c38d987acba1f1d446d3690384327ebe06d15f1f...,0xbaaea72417f4dc3e0f52a1783b0913d0f3516634,2019-10-22 10:07:58,8,Co-fund ETHGlobal ongoing dHack @ ETHWaterloo,# ETHGlobal ongoing dHack - ETHWaterloo\n\n\n#...,2019-10-22 10:07:58,2019-10-27 12:44:18
1,ba7ae0e4-261e-53d6-8ffb-338a9b78fe99,d16f1646-d170-5b57-8812-e43de6b87597,0x0033a337b620add50be9ba0fff1128ab866ece51cfdd...,0x052c68abe8e4bf0b78925e488b98f6fdc18a3af9,2020-01-17 19:07:19,1,second reputation request,second reputation request,2020-01-17 19:07:19,2020-01-17 19:39:16
2,4664058c-09d3-57de-8db7-24590e824bd8,d3229595-ac4c-55bf-9808-386fc5603bd1,0x003c109411e96e5607ce609278536262e12afdaa65ab...,0xb33b9fba681653fe263b31a95766d83d18c2128d,2020-03-14 17:52:32,5,dxwallet.eth set resolver contract,Setting ENS resolver contracts for dxDAO domai...,2020-03-14 17:52:32,2020-03-22 19:54:21
3,b7e51f59-dc39-5326-8d09-5a622159c631,b40189eb-f419-5cb0-b671-d61fe7ff9972,0x0071c649968f7939fc9729c5dc25a8ae3f50dc86220a...,0x86fd6dd41bad636b5b3b9228bc5642fa0df392e8,2019-12-14 14:52:54,1,Reputation Request James Simbouras,"Dear FestDAOists\n\nI am James Simbouras,\nAs ...",2019-12-14 14:52:54,2019-12-18 21:57:59
4,c378293a-4a49-5919-9341-12daca894e40,9fb971b0-de3e-5651-96cf-ad330a9c660b,0x007eaf0fc8d6ae35ba3f28bc7cf4b34af659db8161d4...,0x7cb9032844e722bef2e16a761832c59c96550898,2019-11-01 19:26:29,1,Polkadot wiki translation to spanish (learn-st...,"Translation form English to Spanish, +2500 wor...",2019-11-01 19:26:29,2019-11-06 09:25:53


In [5]:
proposals_df.dtypes

id                              object
deployment_id                   object
platform_proposal_id            object
author                          object
date                    datetime64[ns]
votes_count                      int64
title                           object
description                     object
start                   datetime64[ns]
end                     datetime64[ns]
dtype: object

In [6]:
# proposals_df to pyarrow
import pyarrow as pa
proposals_table = pa.Table.from_pandas(proposals_df)
proposals_table.schema

id: string
deployment_id: string
platform_proposal_id: string
author: string
date: timestamp[ns]
votes_count: int64
title: string
description: string
start: timestamp[ns]
end: timestamp[ns]
-- schema metadata --
pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, "' + 1448

In [7]:
proposals_table.column_names[4]
assert proposals_table.column_names[4] == 'date'

In [8]:
proposals_table = proposals_table.set_column(4, 'date', proposals_table.column(4).cast(pa.timestamp('s')))
proposals_table.schema

id: string
deployment_id: string
platform_proposal_id: string
author: string
date: timestamp[s]
votes_count: int64
title: string
description: string
start: timestamp[ns]
end: timestamp[ns]
-- schema metadata --
pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, "' + 1448

In [9]:
proposals_table

pyarrow.Table
id: string
deployment_id: string
platform_proposal_id: string
author: string
date: timestamp[s]
votes_count: int64
title: string
description: string
start: timestamp[ns]
end: timestamp[ns]
----
id: [["63752d1d-5678-5156-80b3-aa4a52ca399f","ba7ae0e4-261e-53d6-8ffb-338a9b78fe99","4664058c-09d3-57de-8db7-24590e824bd8","b7e51f59-dc39-5326-8d09-5a622159c631","c378293a-4a49-5919-9341-12daca894e40",...,"d651a17a-3573-5bc6-bb82-33c64d1eb722","06939d5f-4308-595f-bbc1-ee13972015fa","f036d639-9064-57d3-9e6d-85ec3b5e7be2","30186bed-d4e9-58eb-bb0e-3d3178c2928d","b4ae4379-753d-5be4-9577-fb6886374e0b"]]
deployment_id: [["1ff410f5-1c80-53cd-9558-d93d1f6de121","d16f1646-d170-5b57-8812-e43de6b87597","d3229595-ac4c-55bf-9808-386fc5603bd1","b40189eb-f419-5cb0-b671-d61fe7ff9972","9fb971b0-de3e-5651-96cf-ad330a9c660b",...,"306b521f-8cb7-5661-af97-fef9c7bc0189","306b521f-8cb7-5661-af97-fef9c7bc0189","306b521f-8cb7-5661-af97-fef9c7bc0189","306b521f-8cb7-5661-af97-fef9c7bc0189","306b521f-8cb7-566

In [10]:
# save to ../parquets_version6/proposals.parquet
pa.parquet.write_table(proposals_table, OUT_FILE)