-
Notifications
You must be signed in to change notification settings - Fork 0
/
upload_similar_artists_table.py
41 lines (34 loc) · 1.39 KB
/
upload_similar_artists_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# -*- coding: utf-8 -*-
import sys
import myria
connection = myria.MyriaConnection(hostname='rest.myria.cs.washington.edu', port='1776', ssl=True)
columnNames = ['song_id', 'artist_id', 'similar_artist']
columnTypes = ['STRING_TYPE'] * 3
schema = myria.MyriaSchema({"columnTypes" : columnTypes, "columnNames" : columnNames})
#schema = {"columnTypes" : columnTypes, "columnNames" : columnNames}
destination = myria.MyriaRelation("Jeremy:MSD:SimilarArtistsTable",
schema=schema,
connection=connection)
directories = ['A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z']
filename_header = 'hdfs://vega:8020/user/hyrkas/msd_data/'
#filename_header = 'hdfs:///user/hyrkas/msd_data/'
filename = 'similar_artist_table.tsv'
upload_files = []
for i in range(1,27) :
f = filename_header + directories[i-1] + '/' + filename
upload_files.append((i, f))
#fakefile = '/home/hyrkas/fakefile.tsv'
fakefile = 'file:///home/hyrkas/fakefile.tsv'
for i in range(27,73) :
upload_files.append((i, fakefile))
print upload_files
scan_parameters = {'delimiter': '\t', 'quote': ''}
query = myria.MyriaQuery.parallel_import(
destination,
upload_files,
scan_parameters = scan_parameters
)
#print query.to_dataframe()