# 開始實驗

我們以較小型的設定做為例子，模擬2018/7/1上午前6小時10000名觀眾的收視行為。以下為方便起見只保留出現密度較高的觀眾數據呈現；同時詳細的實驗產生程式碼定義於*exp*套件中。

In [1]:
from exp import Audience, Experiment
import datetime

myExp = Experiment(
    id = 1, 
    audNum = 10000, 
    startTime = datetime.datetime(2018, 7, 1, 2, 0), 
    endTime = datetime.datetime(2018, 7, 1, 8, 0)
)
myExp.startExp()

# 結果分析

我們選取資料密度(收視時間)較多的前100名觀眾作為分析資料來源。表格中的數據即代表當下該觀眾正在觀看的頻道。

In [2]:
import pandas as pd

channels = []
for aud in myExp.audiences:
    channels.append(list(aud.getRec().values()))
    if len(channels) > 100: break

index = {}
for idx, t in enumerate(list(myExp.audiences[0].getRec().keys())):
    index[idx] = t.strftime("%Y-%m-%d %H:%M:%S")
        
cTable = pd.DataFrame(channels).transpose()
cTable.rename(index=index, inplace=True)
cTable

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
2018-07-01 02:00:00,74,70,66,70,41,58,78,79,900,73,...,55,63,66,55,911,55,64,66,69,62
2018-07-01 02:15:00,80,77,79,69,77,51,68,77,66,65,...,8,68,62,43,900,61,900,58,69,76
2018-07-01 02:30:00,52,6,69,70,900,64,69,9,79,66,...,71,69,69,313,911,51,312,52,70,55
2018-07-01 02:45:00,900,94,907,94,900,94,94,69,94,94,...,94,94,70,69,61,94,94,94,94,55
2018-07-01 03:00:00,69,905,32,900,66,66,68,66,32,69,...,74,76,6,313,63,64,84,72,79,69
2018-07-01 03:15:00,76,61,55,72,62,9,311,911,311,63,...,311,900,81,52,64,900,58,57,51,58
2018-07-01 03:30:00,76,51,55,63,69,237,65,54,62,43,...,313,76,310,29,78,72,55,900,63,75
2018-07-01 03:45:00,68,72,314,71,61,77,68,62,50,66,...,63,77,908,54,55,89,66,314,314,314
2018-07-01 04:00:00,71,69,69,44,65,76,249,71,53,56,...,8,36,64,52,62,9,61,89,249,69
2018-07-01 04:15:00,49,92,54,77,71,91,66,61,73,39,...,911,55,900,63,310,71,69,903,9,82


我們可以利用凱擘所提供的API將頻道號碼轉換為其名稱，方便進行下一步觀察。

In [3]:
import requests

channelMapper = {}
res = requests.get("http://175.98.115.29/hkton/chdata.php").json()["data"]
for r in res:
    channelMapper[r["ch"]] = r["cn"]
    
for col in cTable.columns:
    cTable[col].replace(channelMapper, inplace=True)

cTable

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
2018-07-01 02:00:00,FOX SPORTS 2 HD,CINEMAX HD,東森洋片台HD,CINEMAX HD,八大戲劇台HD,非凡新聞HD,國興衛視HD,FOX HD,900,FOX SPORTS HD,...,TVBS新聞台HD,緯來電影台,東森洋片台HD,TVBS新聞台HD,911,TVBS新聞台HD,LS龍祥電影台,東森洋片台HD,FOX MOVIES HD,東森電影台HD
2018-07-01 02:15:00,JET綜合台HD,緯來日本台HD,FOX HD,FOX MOVIES HD,緯來日本台HD,東森新聞HD,好萊塢電影台,緯來日本台HD,東森洋片台HD,HBO,...,台視HD,好萊塢電影台,東森電影台HD,緯來戲劇台HD,900,衛視電影台HD,900,非凡新聞HD,FOX MOVIES HD,Win 綜合台HD
2018-07-01 02:30:00,中天新聞HD,民視HD,FOX MOVIES HD,CINEMAX HD,900,LS龍祥電影台,FOX MOVIES HD,大愛HD,FOX HD,東森洋片台HD,...,緯來育樂台,FOX MOVIES HD,FOX MOVIES HD,Arirang TV,911,東森新聞HD,BBC世界新聞台,中天新聞HD,CINEMAX HD,TVBS新聞台HD
2018-07-01 02:45:00,900,誠心電視台HD,907,誠心電視台HD,900,誠心電視台HD,誠心電視台HD,FOX MOVIES HD,誠心電視台HD,誠心電視台HD,...,誠心電視台HD,誠心電視台HD,CINEMAX HD,FOX MOVIES HD,衛視電影台HD,誠心電視台HD,誠心電視台HD,誠心電視台HD,誠心電視台HD,TVBS新聞台HD
2018-07-01 03:00:00,FOX MOVIES HD,905,東森綜合台HD,900,東森洋片台HD,東森洋片台HD,好萊塢電影台,東森洋片台HD,東森綜合台HD,FOX MOVIES HD,...,FOX SPORTS 2 HD,Win 綜合台HD,民視HD,Arirang TV,緯來電影台,LS龍祥電影台,鴻基台灣台,緯來體育台HD,FOX HD,FOX MOVIES HD
2018-07-01 03:15:00,Win 綜合台HD,衛視電影台HD,TVBS新聞台HD,緯來體育台HD,東森電影台HD,大愛HD,CNBC Asia,911,CNBC Asia,緯來電影台,...,CNBC Asia,900,Z頻道,中天新聞HD,LS龍祥電影台,900,非凡新聞HD,東森財經新聞HD,東森新聞HD,非凡新聞HD
2018-07-01 03:30:00,Win 綜合台HD,東森新聞HD,TVBS新聞台HD,緯來電影台,FOX MOVIES HD,HBO原創鉅獻,HBO,三立新聞HD,東森電影台HD,緯來戲劇台HD,...,Arirang TV,Win 綜合台HD,彭博財經頻道,三立台灣台HD,國興衛視HD,緯來體育台HD,TVBS新聞台HD,900,緯來電影台,ELEVEN SPORTS 1 HD
2018-07-01 03:45:00,好萊塢電影台,緯來體育台HD,德國之聲電視台,緯來育樂台,衛視電影台HD,緯來日本台HD,好萊塢電影台,東森電影台HD,年代新聞HD,東森洋片台HD,...,緯來電影台,緯來日本台HD,908,三立新聞HD,TVBS新聞台HD,三立iNEWS HD,東森洋片台HD,德國之聲電視台,德國之聲電視台,德國之聲電視台
2018-07-01 04:00:00,緯來育樂台,FOX MOVIES HD,FOX MOVIES HD,高點電視台HD,HBO,Win 綜合台HD,博斯運動二台,緯來育樂台,民視新聞台HD,TVBS HD,...,台視HD,中天綜合台HD,LS龍祥電影台,中天新聞HD,東森電影台HD,大愛HD,衛視電影台HD,三立iNEWS HD,博斯運動二台,FOX MOVIES HD
2018-07-01 04:15:00,壹電視新聞,運通財經台,三立新聞HD,緯來日本台HD,緯來育樂台,中華財經台,東森洋片台HD,衛視電影台HD,FOX SPORTS HD,中天娛樂台HD,...,911,TVBS新聞台HD,900,緯來電影台,彭博財經頻道,緯來育樂台,FOX MOVIES HD,903,大愛HD,ANIMAX


我們亦可以隨機選擇一個觀眾作為分析對象，更進一步觀察其所觀看的節目為何，並配合資策會的社群聲量API進行交叉比對。

In [4]:
from data import Channel, Show, session

audShow = []
sampleRec = myExp.audiences[0].getRec()
for rec in sampleRec:
    shs = pd.read_sql(session.query(Show.dt, Show.tm, Show.dur, Show.p).filter(Show.channel == sampleRec[rec]).filter(Show.dt == rec.strftime("%Y%m%d")).statement, session.bind)
    columns = Show.mapToColNames(list(shs.columns))
    shs.columns = columns

    start = []
    for dt in zip(shs["startDate"], shs["startTime"]):
        d = "{0} {1}".format(dt[0], dt[1])
        start.append(datetime.datetime.strptime(d, "%Y%m%d %H%M%S"))

    del shs["startDate"]
    del shs["startTime"]
    shs["start"] = start
    if len(list(shs[shs["start"] < rec]["showName"])) == 0: audShow.append("")
    else: audShow.append(list(shs[shs["start"] < rec]["showName"])[0])
        
audDf = pd.DataFrame([list(cTable[0]), audShow]).transpose()
audDf.rename(index=index, inplace=True)
audDf

Unnamed: 0,0,1
2018-07-01 02:00:00,FOX SPORTS 2 HD,
2018-07-01 02:15:00,JET綜合台HD,命運好好玩精華版
2018-07-01 02:30:00,中天新聞HD,文茜世界周報
2018-07-01 02:45:00,900,
2018-07-01 03:00:00,FOX MOVIES HD,即刻救援2
2018-07-01 03:15:00,Win 綜合台HD,四端紅人會
2018-07-01 03:30:00,Win 綜合台HD,四端紅人會
2018-07-01 03:45:00,好萊塢電影台,飢餓遊戲
2018-07-01 04:00:00,緯來育樂台,風水!有關係 第26季:20
2018-07-01 04:15:00,壹電視新聞,新聞深呼吸:73


搭配繪圖工具，則可以更進一步將模擬的結果繪出。以下我們以本次實驗時段的頻道熱門程度為例，可以方便觀察各個頻道收視人數在該次模擬下的變化情形。

In [6]:
import matplotlib.pyplot as plt
%matplotlib notebook

cDict = dict()
cList = cTable.values.tolist()
for c in cList:
    for cc in c:
        cDict[cc] = []
        
for idx, c in enumerate(cList):
    for cc in c:
        if len(cDict[cc]) != idx + 1:
            cDict[cc].append(1)
        else:
            cDict[cc][idx] += 1
    for cD in cDict:
        if len(cDict[cD]) != idx +1:
            cDict[cD].append(0)

for cD in cDict:
    try: plt.plot(list(range(24)), cDict[cD], label=next(key for key, value in channelMapper.items() if value == cD))
    except: plt.plot(list(range(24)), cDict[cD], label=cD)

plt.xlabel("Time")
plt.ylabel("Number of people watching")
plt.legend()
plt.grid(True)
plt.plot()

<IPython.core.display.Javascript object>

[]

我們亦可以利用上述資料，求出轉移矩陣(Sparse Matrix Form)

In [39]:
import numpy as np

count = 0
channelList = list(channelMapper.values())
channelList.append("其他")
tMatrix = np.zeros((len(channelList), len(channelList)))
npcList = np.array(cList).T
for lis in npcList:
    for i in range(len(lis)-1):
        try:
            x = channelList.index(lis[i])
        except:
            x = len(channelList)-1
        try:
            y = channelList.index(lis[i+1])
        except:
            y = len(channelList)-1
        
        tMatrix[x][y] += 1
        count += 1

result = []
_x, _y = np.nonzero(tMatrix)
for i in range(_x.shape[0]):
    result.append([_x[i], _y[i], tMatrix[_x[i]][_y[i]]/count])
    
rdf = pd.DataFrame(result)
rdf

Unnamed: 0,0,1,2
0,21,138,0.000430
1,41,55,0.000430
2,44,146,0.000430
3,55,104,0.000430
4,76,155,0.000430
5,85,173,0.000430
6,86,138,0.000430
7,86,146,0.000861
8,89,143,0.000430
9,89,154,0.000430


In [40]:
rdf.describe()

Unnamed: 0,0,1,2
count,1226.0,1226.0,1226.0
mean,143.995922,144.131321,0.000816
std,19.950039,20.025736,0.000746
min,21.0,21.0,0.00043
25%,133.0,133.0,0.00043
50%,145.0,146.0,0.00043
75%,156.0,157.0,0.000861
max,178.0,178.0,0.007318
