# Turn vector csv into npy

In [1]:
# Basic import
import os
import sys
import json
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Helper function
def writeProgress(msg, count, total):
    sys.stdout.write(msg + "{:.2%}\r".format(count/total))
    sys.stdout.flush()
    
def newPath(path):
    if not os.path.isdir(path):
        os.mkdir(path)

def read_json(src_path):
    with open(src_path, 'r') as json_file:
        data = json.load(json_file)
    return data

def write_json(data,dst_path):
    with open(dst_path, 'w') as outfile:
        json.dump(data, outfile)

def writeLog(row):
    with open('log.txt', 'a') as outfile:
        outfile.write(row + '\n')

def getErrMsg(e):
    error_class = e.__class__.__name__ #取得錯誤類型
    detail = e.args[0] #取得詳細內容
    errMsg = "[{}] {}".format(error_class, detail)
    return errMsg

# Feature

## Text

In [None]:
PATH = '../Text/output/TextFeatureVec.csv'
SAVE_NAME = 'text'

In [None]:
df = pd.read_csv(PATH, header = None)
df

In [None]:
df = df.loc[:, 1:]
df

In [None]:
npy = df.to_numpy()
print(npy.shape, npy)

In [None]:
np.save('./npy/' + SAVE_NAME + '.npy', npy)

## Image

### Poster

In [None]:
PATH = '../Image/Sample/output/PosterFeatureVec.csv'
SAVE_NAME = 'poster'

In [None]:
df = pd.read_csv(PATH, header = None)
df

In [None]:
df = df.loc[:, 1:]
df

In [None]:
poster_npy = df.to_numpy()
print(poster_npy.shape, poster_npy)

In [None]:
# np.save('./npy/' + SAVE_NAME + '.npy', npy)

### IG

In [None]:
PATH = '../Image/Sample/output/IGimgFeatureVec.csv'
SAVE_NAME = 'IGimg'

In [None]:
df = pd.read_csv(PATH, header = None)
df

In [None]:
df['username'] = [string.split('_')[0] for string in df[0]]
df

In [None]:
group = df.groupby('username').mean()
group.to_csv('../Image/Sample/output/IGimgFeatureVec_group.csv')
group

In [None]:
IG_npy = group.to_numpy()
print(IG_npy.shape, IG_npy)

In [None]:
# np.save('./npy/' + SAVE_NAME + '.npy', npy)

### Merge image feature

In [None]:
SAVE_NAME = "image"

In [None]:
npy = (poster_npy + IG_npy) / 2
print(npy.shape, npy)

In [None]:
np.save('./npy/' + SAVE_NAME + '.npy', npy)

## Video

In [None]:
PATH = '../Video/csv/computed_minmaxnorm_2019_log.csv'
SAVE_NAME = 'video'

In [None]:
df = pd.read_csv(PATH)
df

In [None]:
df = df.iloc[:, 9:]
df

In [None]:
npy = df.to_numpy()
print(npy.shape, npy)

In [None]:
np.save('./npy/' + SAVE_NAME + '.npy', npy)

## Follow

In [3]:
PATH = '../Autoencoder/output/grid/80-10dim.csv'
SAVE_NAME = 'follow'

In [4]:
df = pd.read_csv(PATH)
df

Unnamed: 0,movie,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9
0,21bridgesmovie,1.313291,1.413283,1.544461,1.886759,1.728442,0.000000,1.137120,1.510174,0.978163,1.822727
1,47metersdown,0.837493,0.905968,1.373671,1.346066,1.459658,0.000000,0.565606,1.517898,0.007892,1.391771
2,abeautifuldaymovie,1.062834,1.609145,1.423535,1.532653,1.381293,0.000000,1.040888,1.345244,0.888910,1.047465
3,abominablemovie,1.119693,1.546281,0.241988,1.136334,1.616928,0.000000,0.707046,0.821275,0.929626,0.747105
4,adastramovie,1.365520,0.578039,1.282238,1.432905,1.270007,0.094023,0.953967,1.771913,0.411324,1.990719
...,...,...,...,...,...,...,...,...,...,...,...
160,wrinklestheclown,0.751404,0.784730,0.843319,0.931058,0.829312,0.000000,0.719121,0.997329,0.524739,0.931197
161,xmenmovies,0.680980,1.059782,1.427396,1.851414,1.099045,0.000000,1.039739,0.825713,0.891358,1.306172
162,yardiefilm,1.058341,1.289066,1.350711,1.435796,1.171857,0.000000,1.037003,1.555916,0.771497,1.444147
163,yesterdaymovie,1.181066,1.273515,1.553790,1.594873,1.626290,0.000000,0.995866,1.313411,0.817974,1.134134


In [5]:
df = df.iloc[:, 1:]
df

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9
0,1.313291,1.413283,1.544461,1.886759,1.728442,0.000000,1.137120,1.510174,0.978163,1.822727
1,0.837493,0.905968,1.373671,1.346066,1.459658,0.000000,0.565606,1.517898,0.007892,1.391771
2,1.062834,1.609145,1.423535,1.532653,1.381293,0.000000,1.040888,1.345244,0.888910,1.047465
3,1.119693,1.546281,0.241988,1.136334,1.616928,0.000000,0.707046,0.821275,0.929626,0.747105
4,1.365520,0.578039,1.282238,1.432905,1.270007,0.094023,0.953967,1.771913,0.411324,1.990719
...,...,...,...,...,...,...,...,...,...,...
160,0.751404,0.784730,0.843319,0.931058,0.829312,0.000000,0.719121,0.997329,0.524739,0.931197
161,0.680980,1.059782,1.427396,1.851414,1.099045,0.000000,1.039739,0.825713,0.891358,1.306172
162,1.058341,1.289066,1.350711,1.435796,1.171857,0.000000,1.037003,1.555916,0.771497,1.444147
163,1.181066,1.273515,1.553790,1.594873,1.626290,0.000000,0.995866,1.313411,0.817974,1.134134


In [6]:
npy = df.to_numpy()
print(npy.shape, npy)

(165, 10) [[1.3132908  1.4132833  1.5444614  ... 1.5101745  0.97816306 1.8227268 ]
 [0.8374928  0.9059681  1.3736712  ... 1.5178976  0.00789221 1.3917707 ]
 [1.062834   1.6091454  1.4235351  ... 1.3452443  0.8889104  1.0474652 ]
 ...
 [1.0583408  1.2890663  1.350711   ... 1.5559162  0.77149683 1.4441472 ]
 [1.1810656  1.2735145  1.5537901  ... 1.3134111  0.8179738  1.1341342 ]
 [0.15627183 3.2025208  1.2557362  ... 0.30579287 1.5426961  0.19550432]]


In [7]:
np.save('./npy/' + SAVE_NAME + '.npy', npy)

## Social

In [None]:
PATH = '../Social/social_norm.csv'
SAVE_NAME = 'social'

In [None]:
df = pd.read_csv(PATH)
df

In [None]:
df = df.iloc[:, 9:]
df

In [None]:
npy = df.to_numpy()
print(npy.shape, npy)

In [None]:
np.save('./npy/' + SAVE_NAME + '.npy', npy)

# Movie genre

In [None]:
PATH = '../genresMat.csv'
SAVE_NAME = 'movie_genre'

In [None]:
df = pd.read_csv(PATH)
df

In [None]:
df = df.iloc[:, 1:]
df

In [None]:
npy = df.to_numpy()
print(npy.shape, npy)

In [None]:
np.save('./npy/' + SAVE_NAME + '.npy', npy)

# User preference

## User following

In [None]:
PATH = '../User/output/UserFollowingsMat.csv'
SAVE_NAME = 'user_followings'

In [None]:
df = pd.read_csv(PATH, header = None)
df

In [None]:
df = df.loc[1:, 1:]
df

In [None]:
npy = df.to_numpy().astype(int)
print(npy.shape, npy)

In [None]:
np.save('./npy/' + SAVE_NAME + '.npy', npy)

## User genre

In [None]:
SAVE_NAME = 'user_genre'

In [None]:
user_followings = np.load('./npy/user_followings.npy')
movie_genre = np.load('./npy/movie_genre.npy')

In [None]:
user_followings = user_followings.astype(int)
movie_genre = movie_genre.astype(int)

In [None]:
user_genre = user_followings.dot(movie_genre)

In [None]:
print(user_genre.shape)

In [None]:
df = pd.DataFrame(user_genre)
df

In [None]:
npy = df.to_numpy()
print(npy.shape, npy)

In [None]:
np.save('./npy/' + SAVE_NAME + '.npy', npy)

In [None]:
columns = read_json('../orderedListGenres.json')
print(len(columns), columns)
users = read_json('../orderedListUsers.json')
print(len(users), users)

In [None]:
df.columns = columns
df.index = users
df

In [None]:
df.to_csv('../User/output/UserGenreMat.csv')

# Merge all features

In [8]:
text_npy = np.load('./npy/text.npy')
# poster_npy = np.load('./npy/poster.npy')
# IGimg_npy = np.load('./npy/IGimg.npy')
image_npy = np.load('./npy/image.npy')
video_npy = np.load('./npy/video.npy')
follow_npy = np.load('./npy/follow.npy')
social_npy = np.load('./npy/social.npy')

print('Text:', text_npy.shape)
# print('Poster:', poster_npy.shape)
# print('IGimg:', IGimg_npy.shape)
print('Image:', image_npy.shape)
print('Video:', video_npy.shape)
print('Follow:', follow_npy.shape)
print('Social:', social_npy.shape)

Text: (165, 300)
Image: (165, 2048)
Video: (165, 8)
Follow: (165, 10)
Social: (165, 6)


In [9]:
npys = [image_npy, video_npy, follow_npy, social_npy]

In [10]:
tmp = text_npy

for npy in npys:
    tmp = np.hstack([tmp, npy])
    
print(tmp.shape)

(165, 2372)


In [11]:
SAVE_NAME = 'all_2372'

In [12]:
np.save('./npy/' + SAVE_NAME + '.npy', tmp)