### Load packages

In [1]:
import numpy as np
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt

In [48]:
# Create enviroment if not done before. 
#!conda create --name sna --file environment.yml

### Load an concat data

In [34]:
postings_1 = pd.read_csv('input/Postings_01052019_15052019.csv', sep=';')
postings_2 = pd.read_csv('input/Postings_16052019_31052019.csv', sep=';')
votes_1 = pd.read_csv('input/Votes_01052019_15052019.csv', sep=';')
votes_2 = pd.read_csv('input/Votes_16052019_31052019.csv', sep=';')
follow_ignore = pd.read_csv(
    "input/Following_Ignoring_Relationships_01052019_31052019.csv", sep=";")

# Merge the two datasets
postings = pd.concat([postings_1, postings_2])
votes = pd.concat([votes_1, votes_2])


In [18]:
votes.head(2)

Unnamed: 0,ID_CommunityIdentity,ID_Posting,VoteNegative,VotePositive,VoteCreatedAt,UserCommunityName,UserGender,UserCreatedAt
0,675862,1041076570,1,0,2019-05-06 16:47:46.883,Heckscheibenwischer,m,2018-06-26 06:04:30.513
1,689023,1041076570,1,0,2019-05-01 22:19:06.240,Heinz Fettleber,,2019-03-08 21:23:11.463


In [51]:
postings.head(2)

Unnamed: 0,ID_Posting,ID_Posting_Parent,ID_CommunityIdentity,PostingHeadline,PostingComment,PostingCreatedAt,ID_Article,ArticlePublishingDate,ArticleTitle,ArticleChannel,ArticleRessortName,UserCommunityName,UserGender,UserCreatedAt
0,1041073586,1041073000.0,671476,Das hat gestern bereits der Voggenhuber angefü...,schieder hatte dem inhaltlich nichts entgegenz...,2019-05-01 18:21:15.127,2000102330973,2019-05-01 10:28:57.49,1. Mai in Wien: SPÖ fordert von Strache Rücktritt,Inland,Parteien,Ravenspower,,2018-04-14 13:42:28.470
1,1041073839,1041073000.0,566938,,...und meinen Bezirk bekommst du als Erbe mit.,2019-05-01 18:28:22.040,2000102330973,2019-05-01 10:28:57.49,1. Mai in Wien: SPÖ fordert von Strache Rücktritt,Inland,Parteien,AlphaRomeo,m,2015-08-28 17:07:41.110


## Introduction

We want to answer the question if selection and social influence play a role after users first had contact in the Standart Forum. Therefore we need to determine the first contact by two users, this could be done in two ways: 
1. Reply: user a and b first had contact when user a replied to a comment of user b
2. Vote: user a upvoted a comment of user b

Both are forms of explicit feedback which is better than implicit feedback and therefore suitable for this kind of question.

## Define contact by upvote

In [45]:
# Find u-u tuples with their first date of interaction by vote
first_contact_vote_pairs = (votes[["UserCommunityName", "UserCreatedAt", "ID_Posting", "VoteCreatedAt"]]
 .merge(postings[["ID_Posting", "UserCommunityName", "UserCreatedAt"]], on=["ID_Posting"], how="left")
 [["UserCommunityName_x", "UserCommunityName_y", "VoteCreatedAt"]]
 .sort_values("VoteCreatedAt")
 .groupby(["UserCommunityName_x", "UserCommunityName_y"])
 .first()
 .reset_index())

first_contact_vote_pairs.head(5)

Unnamed: 0,UserCommunityName_x,UserCommunityName_y,VoteCreatedAt
0,!! Melzer = Doppelspieler !!,1816/55,2019-05-30 08:00:24.183
1,!! Melzer = Doppelspieler !!,Hasan_Vural,2019-05-03 19:36:13.510
2,!! Melzer = Doppelspieler !!,Mohrdred,2019-05-08 09:01:16.380
3,!! Melzer = Doppelspieler !!,Wissender,2019-05-30 08:05:59.733
4,!!!DerAbgrund!!!,Gewerkschaftelhuber,2019-05-30 22:00:26.557


## Define first contact by reply

In [47]:
# Find u-u tuples with their first date of interaction by reply
first_contact_reply_pairs = (postings.dropna(subset=["ID_Posting_Parent"])[
 ["UserCommunityName", "ID_Posting_Parent", "PostingCreatedAt"]]
 .merge(postings[["ID_Posting", "UserCommunityName"]], left_on=["ID_Posting_Parent"], right_on=["ID_Posting"], how="left")
 [["UserCommunityName_x", "UserCommunityName_y", "PostingCreatedAt"]]
 .sort_values("PostingCreatedAt")
 .groupby(["UserCommunityName_x", "UserCommunityName_y"])
 .first()
 .reset_index())

first_contact_reply_pairs.head(5)


Unnamed: 0,UserCommunityName_x,UserCommunityName_y,PostingCreatedAt
0,!!!DerAbgrund!!!,Katmai,2019-05-28 08:46:38.747
1,"""UnschuldsVermutung"" die Erste",Bono,2019-05-21 07:10:35.887
2,"""Unschuldsvermutung""",Cabernet Sauvignon,2019-05-10 15:20:14.960
3,"""Unschuldsvermutung""",Jimmy Hey,2019-05-10 15:21:38.460
4,"""mir fällt kein nick ein"" ist schon vergeben",ConanBarbarian,2019-05-28 13:11:37.280
