In [12]:
import pandas as pd
import re
from textblob import TextBlob


# Define the file path
file_path = '../../data/wiki-RfA.txt'

# Initialize the data list to store TXT records
data = []

# Process the file and extract TXT records
with open(file_path, 'r', encoding='utf-8') as file:
    for line in file:
        line = line.strip()

        # Match only lines prefixed with TXT
        match = re.match(r"^TXT:(.*)$", line)
        if match:
            record = match.group(1).strip()
            # Exclude lines containing 'Support' or 'Oppose'
            if 'support' not in record.lower() and 'oppose' not in record.lower():
                data.append(record)

# Convert the TXT records to a DataFrame
df_txt = pd.DataFrame(data, columns=["TXT"])

# Perform sentiment analysis using TextBlob
def analyze_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity, blob.sentiment.subjectivity

# Apply sentiment analysis and add results to the DataFrame
df_txt["Polarity"], df_txt["Subjectivity"] = zip(*df_txt["TXT"].apply(analyze_sentiment))

# Print the first 30 results
print(df_txt.head(30))

# Save the DataFrame to a CSV file (optional)
df_txt.to_csv('../../data/wiki_rfa_textblob.csv', index=False)


                                                  TXT  Polarity  Subjectivity
0         Yea, I've seen him a lot. Good candidate. —  0.700000      0.600000
1                                                      0.000000      0.000000
2   If an editor has 10,000 edits to mainspace, an... -0.125000      0.375000
3   [[WP:NETPOS]]. Reviewed the candidate eralier ...  0.350000      0.500000
4   '''Neutral''' for now. I am concerned at the s...  0.159091      0.540909
5   '''Neutral''' for now, I think that the candid...  0.600000      0.633333
6   '''Yes''' - CCI is hugely backlogged and needs...  0.400000      0.900000
7                                          {{pro}} --  0.000000      0.000000
8                                       '''Good''' --  0.700000      0.600000
9                                                      0.000000      0.000000
10  I trust your judgement. <span style="text-shad...  0.000000      0.000000
11  You gave a tremendous answer to Q9; answering ...  0.170833 

In [13]:
import pandas as pd
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Define the file path
file_path = '../../data/wiki-RfA.txt'

# Initialize the data list to store TXT records
data = []

# Process the file and extract TXT records
with open(file_path, 'r', encoding='utf-8') as file:
    for line in file:
        line = line.strip()

        # Match only lines prefixed with TXT
        match = re.match(r"^TXT:(.*)$", line)
        if match:
            record = match.group(1).strip()
            # Exclude lines containing 'Support' or 'Oppose'
            if 'support' not in record.lower() and 'oppose' not in record.lower():
                data.append(record)

# Convert the TXT records to a DataFrame
df_txt = pd.DataFrame(data, columns=["TXT"])

# Perform sentiment analysis using VADER
analyzer = SentimentIntensityAnalyzer()

def analyze_sentiment_vader(text):
    sentiment_scores = analyzer.polarity_scores(text)
    return sentiment_scores['compound'], sentiment_scores['pos'], sentiment_scores['neu'], sentiment_scores['neg']

# Apply sentiment analysis and add results to the DataFrame
df_txt["Compound"], df_txt["Positive"], df_txt["Neutral"], df_txt["Negative"] = zip(*df_txt["TXT"].apply(analyze_sentiment_vader))

# Print the first 30 results
print(df_txt.head(30))

# Save the DataFrame to a CSV file (optional)
df_txt.to_csv('../../data/wiki_rfa_vader.csv', index=False)


                                                  TXT  Compound  Positive  \
0         Yea, I've seen him a lot. Good candidate. —    0.4404     0.266   
1                                                        0.0000     0.000   
2   If an editor has 10,000 edits to mainspace, an...   -0.5423     0.000   
3   [[WP:NETPOS]]. Reviewed the candidate eralier ...    0.2960     0.186   
4   '''Neutral''' for now. I am concerned at the s...    0.9258     0.206   
5   '''Neutral''' for now, I think that the candid...    0.0000     0.000   
6   '''Yes''' - CCI is hugely backlogged and needs...    0.7096     0.330   
7                                          {{pro}} --    0.0000     0.000   
8                                       '''Good''' --    0.4404     0.744   
9                                                        0.0000     0.000   
10  I trust your judgement. <span style="text-shad...    0.5106     0.248   
11  You gave a tremendous answer to Q9; answering ...    0.0000     0.000   