diff --git a/Sentiment_analysis.ipynb b/Sentiment_analysis.ipynb new file mode 100644 index 0000000..e2158d1 --- /dev/null +++ b/Sentiment_analysis.ipynb @@ -0,0 +1,4576 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R8xlgDHt07hl" + }, + "outputs": [], + "source": [ + "%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117\n", + "%pip install transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "bktbQ_8Sw_4x", + "outputId": "77b93666-ad40-45f0-ed25-dba9f4cd0d88", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 384 + } + }, + "outputs": [ + { + "output_type": "error", + "ename": "ModuleNotFoundError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtransformers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAutoTokenizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAutoModelForSequenceClassification\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAdamW\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_selection\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mconfusion_matrix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclassification_report\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'transformers'", + "", + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n" + ], + "errorDetails": { + "actions": [ + { + "action": "open_url", + "actionText": "Open Examples", + "url": "/notebooks/snippets/importing_libraries.ipynb" + } + ] + } + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import re\n", + "import warnings\n", + "\n", + "import torch\n", + "from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import confusion_matrix, classification_report\n", + "from torch.utils.data import Dataset, DataLoader\n", + "from torch.nn.utils.rnn import pad_sequence\n", + "import tensorflow as tf\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n", + "\n", + "\n", + "warnings.filterwarnings('ignore')\n", + "plt.style.use('ggplot')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 423 + }, + "id": "WY9S2Tpc1Zjl", + "outputId": "397de4ab-8283-484a-d4d8-2999f398c2d0" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Text Language Label\n", + "0 @Charlie_Corley @Kristine1G @amyklobuchar @Sty... en litigious\n", + "1 #BadBunny: Como dos gotas de agua: Joven se di... es negative\n", + "2 https://t.co/YJNiO0p1JV Flagstar Bank disclose... en litigious\n", + "3 Rwanda is set to host the headquarters of Unit... en positive\n", + "4 OOPS. I typed her name incorrectly (today’s br... en litigious\n", + "... ... ... ...\n", + "937849 @Juice_Lemons in the dark. it’s so good en positive\n", + "937850 8.SSR & Disha Salian case should be solved... en negative\n", + "937851 *ACCIDENT: Damage Only* - Raleigh Fire Depart... en negative\n", + "937852 @reblavoie So happy for her! She’s been incred... en positive\n", + "937853 I'm lost and I'm found but en negative\n", + "\n", + "[937854 rows x 3 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TextLanguageLabel
0@Charlie_Corley @Kristine1G @amyklobuchar @Sty...enlitigious
1#BadBunny: Como dos gotas de agua: Joven se di...esnegative
2https://t.co/YJNiO0p1JV Flagstar Bank disclose...enlitigious
3Rwanda is set to host the headquarters of Unit...enpositive
4OOPS. I typed her name incorrectly (today’s br...enlitigious
............
937849@Juice_Lemons in the dark. it’s so goodenpositive
9378508.SSR & Disha Salian case should be solved...ennegative
937851*ACCIDENT: Damage Only* - Raleigh Fire Depart...ennegative
937852@reblavoie So happy for her! She’s been incred...enpositive
937853I'm lost and I'm found butennegative
\n", + "

937854 rows × 3 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')\n", + "\n", + "df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dataset.csv')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "d_uo7WcUX74w", + "outputId": "514e1537-21dc-43f5-c6c3-80a38359fe4e" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(937854, 3)" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oHNWPI6aGfDY", + "outputId": "fa25099e-4430-43b1-e412-98caa0d919e5" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['en', 'es', 'ca', 'fr', 'pt', 'ja', 'hi', 'ar', 'ko', 'nl', 'et',\n", + " 'in', 'und', 'tl', 'zh', 'de', 'cs', 'ru', 'qme', 'it', 'el', 'ro',\n", + " 'no', 'tr', 'sv', 'ta', 'fa', 'ht', 'pl', 'da', 'th', 'hu', 'lv',\n", + " 'uk', 'qht', 'eu', 'qam', 'si', 'cy', 'zxx', 'ml', 'ne', 'mr',\n", + " 'qst', 'vi', 'bn', 'gu', 'is', 'fi', 'ckb', nan, 'te', 'art', 'bg',\n", + " 'ur', 'sl', 'lt', 'pa', 'iw', 'kn', 'sr',\n", + " \"[Photo(previewUrl='https://pbs.twimg.com/media/FWV94O7UEAAjMOi?format=jpg&name=small', fullUrl='https://pbs.twimg.com/media/FWV94O7UEAAjMOi?format=jpg&name=large')]\",\n", + " \"[Photo(previewUrl='https://pbs.twimg.com/media/FWWBzWTXkAAyZqm?format=jpg&name=small', fullUrl='https://pbs.twimg.com/media/FWWBzWTXkAAyZqm?format=jpg&name=large')]\",\n", + " 'am', 'or',\n", + " \"[Photo(previewUrl='https://pbs.twimg.com/media/EPYG2rKVAAA1e_O?format=jpg&name=small', fullUrl='https://pbs.twimg.com/media/EPYG2rKVAAA1e_O?format=jpg&name=large')]\",\n", + " 'sd',\n", + " \"[Photo(previewUrl='https://pbs.twimg.com/media/FWXXH-AUcAAp6Wc?format=jpg&name=small', fullUrl='https://pbs.twimg.com/media/FWXXH-AUcAAp6Wc?format=jpg&name=large')]\",\n", + " \"[Photo(previewUrl='https://pbs.twimg.com/media/FWVRZ6bWYAAM8-3?format=jpg&name=small', fullUrl='https://pbs.twimg.com/media/FWVRZ6bWYAAM8-3?format=jpg&name=large')]\",\n", + " \"[Photo(previewUrl='https://pbs.twimg.com/media/FWWLs-qWQAEUKqZ?format=jpg&name=small', fullUrl='https://pbs.twimg.com/media/FWWLs-qWQAEUKqZ?format=jpg&name=large')]\",\n", + " \"[Photo(previewUrl='https://pbs.twimg.com/media/FWVhrnXX0AE0h9Z?format=jpg&name=small', fullUrl='https://pbs.twimg.com/media/FWVhrnXX0AE0h9Z?format=jpg&name=large')]\",\n", + " \"[Video(thumbnailUrl='https://pbs.twimg.com/ext_tw_video_thumb/1541709480324259840/pu/img/E_bkLKnbgX96ui7e.jpg', variants=[VideoVariant(contentType='application/x-mpegURL', url='https://video.twimg.com/ext_tw_video/1541709480324259840/pu/pl/_agIGuySTtX4_1HY.m3u8?tag=12&container=fmp4', bitrate=None), VideoVariant(contentType='video/mp4', url='https://video.twimg.com/ext_tw_video/1541709480324259840/pu/vid/1280x720/VESWY4_w8FIbtAih.mp4?tag=12', bitrate=2176000), VideoVariant(contentType='video/mp4', url='https://video.twimg.com/ext_tw_video/1541709480324259840/pu/vid/480x270/N8UYgrlJKVHmxOkv.mp4?tag=12', bitrate=256000), VideoVariant(contentType='video/mp4', url='https://video.twimg.com/ext_tw_video/1541709480324259840/pu/vid/640x360/0e6bAb2v0ZFSrU2o.mp4?tag=12', bitrate=832000)], duration=18.646, views=5)]\",\n", + " \"[Photo(previewUrl='https://pbs.twimg.com/media/FWXYTY7XEAY4sw8?format=jpg&name=small', fullUrl='https://pbs.twimg.com/media/FWXYTY7XEAY4sw8?format=jpg&name=large')]\"],\n", + " dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "df['Language'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jBs5CPkxbn25" + }, + "outputs": [], + "source": [ + "english_df = df[df['Language'] =='en']\n", + "\n", + "#let's drop the language column\n", + "english_df.drop(columns='Language', inplace=True)\n" + ] + }, + { + "cell_type": "code", + "source": [ + "\n", + "uncertainty_df = english_df[df['Label'] =='uncertainty'][:30000]\n", + "positive_df = english_df[df['Label'] =='positive'][:30000]\n", + "negative_df = english_df[df['Label'] =='negative'][:30000]\n", + "litigious_df = english_df[df['Label'] =='litigious'][:1200]" + ], + "metadata": { + "id": "_udrC5EG6yLH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "uncertainty_df" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 423 + }, + "id": "N_hDnrK_bbog", + "outputId": "55c9bee6-b8b3-4c31-b0ca-9a67f9d21a7e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Text Label\n", + "7 @ShawnTarloff @itsmieu you can also relate thi... uncertainty\n", + "22 ew its almost valentine’s day https://t.co/FA5... uncertainty\n", + "25 @BlackMercury3 I’m thinking that maybe Ironwoo... uncertainty\n", + "29 @zoidberg95 Infinite Diversity in Infinite Com... uncertainty\n", + "33 Sad thing is.. he's causing more damage than T... uncertainty\n", + "... ... ...\n", + "142795 @Sidelinecreepin i’m the opposite. i root for ... uncertainty\n", + "142811 @donwinslow @PressSec Maybe she could a press ... uncertainty\n", + "142814 Can't believe it's been almost 10 years since ... uncertainty\n", + "142830 @Kat_Ozburn Public school can be great. Of cou... uncertainty\n", + "142832 @viviant_g I have no idea. I just saved this f... uncertainty\n", + "\n", + "[30000 rows x 2 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TextLabel
7@ShawnTarloff @itsmieu you can also relate thi...uncertainty
22ew its almost valentine’s day https://t.co/FA5...uncertainty
25@BlackMercury3 I’m thinking that maybe Ironwoo...uncertainty
29@zoidberg95 Infinite Diversity in Infinite Com...uncertainty
33Sad thing is.. he's causing more damage than T...uncertainty
.........
142795@Sidelinecreepin i’m the opposite. i root for ...uncertainty
142811@donwinslow @PressSec Maybe she could a press ...uncertainty
142814Can't believe it's been almost 10 years since ...uncertainty
142830@Kat_Ozburn Public school can be great. Of cou...uncertainty
142832@viviant_g I have no idea. I just saved this f...uncertainty
\n", + "

30000 rows × 2 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "uncertainty_df['Text'][7]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "kxUu6XI0bkb5", + "outputId": "dd88b830-49f9-4941-8776-2c5f43d8d4a8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'@ShawnTarloff @itsmieu you can also relate this to art too!!! a lot of people are dismayed in starting art because of this kind of thing and i always try to tell them that everyone starts somewhere and those who are \"good\" by certain ages just trained enough to get there, like with anything you learn'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Feature Engineering\n", + "\n", + " - Engineer the litigious label to make it more extensive" + ], + "metadata": { + "id": "mJvQBUzQxxxV" + } + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "new_records = []\n", + "\n", + "litigious_sentences = [\n", + " 'The plaintiff alleges that the defendant committed defamation by spreading false and damaging statements about their character.',\n", + " 'The company is facing a lawsuit for patent infringement due to their unauthorized use of a patented technology.',\n", + " 'The accused party is being sued for breach of contract, as they failed to fulfill their obligations as outlined in the signed agreement.',\n", + " 'I will bury this place in so much law suit that your grand children are going to need lawyers',\n", + " 'The plaintiff claims substantial damages resulting from the defendant\\'s fraudulent activities, including misrepresentation of financial information.',\n", + " 'The employer is facing a legal battle over allegations of wrongful termination and violation of labor laws, as the employee argues they were fired without just cause.',\n", + " 'The plaintiff seeks compensation for the alleged negligence of the defendant, which resulted in personal injury and financial loss.',\n", + " 'The company filed a lawsuit against its former employee for alleged misappropriation of trade secrets, claiming significant damages.',\n", + " 'The court issued an injunction to prevent the defendant from further trademark violation and ordered them to cease all unauthorized use of the protected mark.',\n", + " 'The plaintiff accuses the defendant of libel and slander, claiming that the false statements made by the defendant harmed their personal and professional reputation.',\n", + " 'The dispute between the two parties arose from an alleged breach of fiduciary duty, with the plaintiff asserting that the defendant prioritized personal gain over their legal obligations.',\n", + " 'The neighbors filed a complaint against each other for alleged trespassing, seeking a restraining order to prevent further disputes.',\n", + " 'The landlord is facing a lawsuit from the tenant, claiming breach of contract due to the landlord\\'s failure to address necessary repairs in a timely manner.',\n", + " 'A legal battle between former spouses ensued over child custody, with both parties accusing each other of parental alienation and seeking sole custody rights.',\n", + " 'The homeowners\\' association sent a cease and desist letter to a resident for violating community guidelines by conducting business activities from their home.',\n", + " 'Siblings engaged in a dispute over their late parent\\'s estate, resulting in a contentious probate case with allegations of undue influence and mismanagement of assets.'\n", + "]\n", + "\n", + "for i in range(1800):\n", + " for sentence in litigious_sentences:\n", + " text = sentence\n", + " label = 'litigious'\n", + " record = {'Text': text, 'Label': label}\n", + " new_records.append(record)\n", + "\n", + "\n", + "engineered_litigious_df = pd.DataFrame(new_records)\n", + "\n", + "engineered_litigious_df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "XH77dtno2CUG", + "outputId": "1d8378f6-d2ff-4bc9-f066-d9098c3e5229" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Text Label\n", + "0 The plaintiff alleges that the defendant commi... litigious\n", + "1 The company is facing a lawsuit for patent inf... litigious\n", + "2 The accused party is being sued for breach of ... litigious\n", + "3 I will bury this place in so much law suit tha... litigious\n", + "4 The plaintiff claims substantial damages resul... litigious" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TextLabel
0The plaintiff alleges that the defendant commi...litigious
1The company is facing a lawsuit for patent inf...litigious
2The accused party is being sued for breach of ...litigious
3I will bury this place in so much law suit tha...litigious
4The plaintiff claims substantial damages resul...litigious
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "engineered_litigious_df.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1zyyj5Yx9jNm", + "outputId": "8b90fbf4-ff12-4f18-f351-1ae6d53e6527" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(28800, 2)" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "data = pd.concat([litigious_df, uncertainty_df, positive_df, negative_df, engineered_litigious_df], axis=0, ignore_index=True)\n", + "data" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 423 + }, + "id": "D2y2VvJ51eFO", + "outputId": "b60f1e57-a594-44a4-a7fe-7f58885e2057" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Text Label\n", + "0 @Charlie_Corley @Kristine1G @amyklobuchar @Sty... litigious\n", + "1 https://t.co/YJNiO0p1JV Flagstar Bank disclose... litigious\n", + "2 OOPS. I typed her name incorrectly (today’s br... litigious\n", + "3 @SaltBurned [When the first sign of surrender ... litigious\n", + "4 \"It's Your fight to save the UK from the liars... litigious\n", + "... ... ...\n", + "119995 The neighbors filed a complaint against each o... litigious\n", + "119996 The landlord is facing a lawsuit from the tena... litigious\n", + "119997 A legal battle between former spouses ensued o... litigious\n", + "119998 The homeowners' association sent a cease and d... litigious\n", + "119999 Siblings engaged in a dispute over their late ... litigious\n", + "\n", + "[120000 rows x 2 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TextLabel
0@Charlie_Corley @Kristine1G @amyklobuchar @Sty...litigious
1https://t.co/YJNiO0p1JV Flagstar Bank disclose...litigious
2OOPS. I typed her name incorrectly (today’s br...litigious
3@SaltBurned [When the first sign of surrender ...litigious
4\"It's Your fight to save the UK from the liars...litigious
.........
119995The neighbors filed a complaint against each o...litigious
119996The landlord is facing a lawsuit from the tena...litigious
119997A legal battle between former spouses ensued o...litigious
119998The homeowners' association sent a cease and d...litigious
119999Siblings engaged in a dispute over their late ...litigious
\n", + "

120000 rows × 2 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "81BEYRHSbzzR" + }, + "source": [ + "## EDA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "PdpIDmTWr6MR", + "outputId": "9ab96080-21fa-41d7-8001-7e8650c1b0ca" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 120000 entries, 0 to 119999\n", + "Data columns (total 2 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Text 120000 non-null object\n", + " 1 Label 120000 non-null object\n", + "dtypes: object(2)\n", + "memory usage: 1.8+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TXPDU8_rb2HA", + "outputId": "e66aa909-6154-40c9-ac7d-eed82e1f13ab" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "litigious 30000\n", + "uncertainty 30000\n", + "positive 30000\n", + "negative 30000\n", + "Name: Label, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "sentiments = data['Label'].value_counts().sort_values(ascending=False)\n", + "sentiments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 546 + }, + "id": "5NLT77j4cvAB", + "outputId": "3e601057-10a3-4aec-87ab-23ebde5b2e73" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3QAAAIRCAYAAAAC3v/mAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABINUlEQVR4nO3de3zO9f/H8ee1XdtsmG0MG+YQk1MOiQ5ETpEIiUo5hb5R+H5/1Tf55lBJvqofFaUsh0L5KkpFB2edKHI+M8w2s9jY2Pn9+8Nv17erDXO89rbH/XbbjetzfF2f1/XZ9tzn5DDGGAEAAAAArOPl6QIAAAAAAJeGQAcAAAAAliLQAQAAAIClCHQAAAAAYCkCHQAAAABYikAHAAAAAJYi0AEAAACApQh0AAAAAGApAh0AAAAAWIpABwCXoG/fvnI4HIqOjvZ0KbjG5s2bp4YNG6pkyZJyOBwaPny4p0u6JDNnzpTD4dDMmTPdhlepUkVVqlS55OU6HA61bNnysmqzWXR0tBwOh/r27evpUgAUEQQ6ANeVXr16yeFwaOrUqRectl27dnI4HFq4cOE1qOzaycjIUFRUlDp27KiwsDD5+fmpZMmSatCggYYPH67Nmzd7usQCudxgcTX89NNP6tWrl06dOqUnnnhCo0ePVvv27c87T25wOt9XYXufhcl3332nrl27Kjw8XL6+vgoODlZkZKQeeOABvfnmmzLGXPOaikpoLYz7IIC8nJ4uAACupIEDB2ru3LmaPn26Bg8efM7poqOj9f333yssLEydOnW6hhVeXbt371aXLl20Y8cOlSlTRm3btlVERIQyMjK0fft2vfvuu3rzzTe1aNEide7c2dPlWuerr76SMUazZ8/W7bffflHz1q9fX126dMl3XFBQ0OUXd4UsW7bM0yW4vPLKKxo5cqScTqfat2+vmjVrytvbW/v27dOqVau0YMECDR48WE5n4fl1pkKFCtqxY4dKlSrl6VIAFBGF5zsgAFwBLVu2VGRkpDZu3KgNGzaoUaNG+U4XFRUlY4z69etXqH4ZvBxHjx5V69atFRMTo+HDh+uVV16Rv7+/2zQJCQkaO3asTpw44aEq7RYbGytJCg8Pv+h5GzRooDFjxlzhiq68G264wdMlSJIOHjyoUaNGKTAwUGvXrlW9evXcxufk5Oi7776Tt7e3hyrMn4+Pj2688UZPlwGgCOGUSwDXnYEDB0qS3n///XzHZ2dna8aMGXI4HBowYIAkadGiRXrkkUcUGRmp4sWLq3jx4rr55pv15ptvKicnp0DrXblypRwOxzl/aT/f6Uvz5s3TXXfdpaCgIBUrVky1atXSyy+/rPT09AKtW5L+9a9/KSYmRg899JD+93//N0+Yk6SyZctqypQpevDBB92Gx8XFaciQIapSpYp8fX0VGhqqbt266bfffsuzjDFjxsjhcGjlypV5xp3r+qE/X3M4bdo01atXT8WKFVO5cuU0aNAgJScnu6bN3Y4HDx7UwYMH3U5N/PNy16xZo06dOqlixYry8/NT+fLldeutt2rs2LEF3mY5OTl69913dcstt6hEiRIqXry4brnlFr3zzjtufc89bXLGjBmSpKpVq7pquhrXUeZ+VlJTU/XMM88oIiJCfn5+ql69uiZMmJDvaYbGGE2ePFm1a9dWsWLFVKFCBT355JNKTk6+qFPn8ps2IyNDb775pho1aqTg4GAFBASoSpUquu+++/T999/nu5zExEQNGjTIddpvnTp1XNuvIH755RdlZ2frrrvuyhPmJMnLy0t33323HA5HvvN2795d5cuXl6+vrypVqqTHH3/cFcj/rGXLlnI4HMrKytIrr7yiGjVqyM/PT5UqVdI///lPZWRkuKbN/RxI0qpVq9w+m7n7/YX2gQMHDujtt9929alKlSp65ZVXXD39z3/+oyZNmqh48eIqW7asnnzySZ05cybfbbRz50717dtXlSpVkq+vr8qVK6eHH35Yu3btyjNtYd0HAVy+6+PP0gDwJ3369NHIkSM1b948vf766woICHAbv2TJEh05ckRt27ZV1apVJUnPPfecvLy81LRpU1WoUEHJyclavny5hg0bpvXr1+vDDz+8avX2799fM2bMUMWKFXX//fcrKChIP//8s1544QUtW7ZM33333QWPIp45c8ZV4+jRoy+4Tj8/P9f/Dxw4oGbNmik2NlatWrXSQw89pMOHD+s///mPvvrqK3366ae69957L+9N/r9nn31W33zzjTp16qR27dppxYoVev/997V3714tX75c0tlAMXr0aE2aNEmS3G460qBBA0nS0qVL1bFjRwUGBqpz586qUKGCjh8/rh07dmjq1KkF2gaS9Oijj2ru3LmqVKmSBgwY4LqmcvDgwVq7dq3mzJnjWu/o0aO1aNEibdq0ScOGDXOdJnm1TpfMzMzU3XffrdjYWHXo0EFOp1OLFi3Sc889p7S0tDzvcciQIXrnnXcUHh6uQYMGydfXV1988YXWrVunzMxM+fj4XHItffv21bx581S3bl317t1b/v7+io2N1dq1a7V06VK1adPGbfqkpCTdcccd8vX1Vffu3ZWenq7//Oc/6t+/v7y8vNSnT58LrrN06dKSpP379ys7O7vAR+I++OADDRo0SH5+furcubMqVaqkPXv2aPr06Vq8eLF+/vlnRURE5Jnv4Ycf1po1a9ShQwcFBgbq66+/1r///W8lJCS4gmju52Ds2LGqXLmyW7gp6DV1Tz/9tFauXOnaB7744guNHDlSGRkZCgkJ0XPPPacuXbqoefPm+u677zRlyhRlZ2frnXfecVvO0qVL1a1bN2VmZqpTp06qXr26YmJi9Nlnn+mrr77SihUr8j1DobDtgwCuAAMA16EePXoYSWbGjBl5xnXu3NlIMv/5z39cw/bu3ZtnuuzsbNO7d28jyfz8889u4/r06WMkmQMHDriGrVixwkgyo0ePzremypUrm8qVK7sNmzFjhpFkunbtak6fPu02bvTo0UaSmTRp0vnfrDFm9erVRpKpUKHCBaf9q3bt2hlJ5uWXX3Yb/sMPPxhvb28TEhJiTp06laeuFStW5FnWgQMHjCTTp08ft+G526tSpUrm4MGDruGZmZmmefPmRpL55Zdf3ObJb3vl6tatm5Fkfv/99zzjjh07doF3fNbcuXONJNOwYUO395eSkmJuvvlmI8nMmTMn3/fx575fSG6P69evb0aPHp3v15IlS9zmqVy5spFkOnTo4Pa5OHr0qClVqpQpVaqUycjIcA3P7X9kZKQ5ceKEa3h6erpr+57rs/fXfeSv2z0pKck4HA5z8803m6ysrDzvLzEx0e21JCPJPPbYY27Tb9u2zXh7e5tatWpdaJMZY872IXc7NG/e3ERFRZmtW7fmW0OuXbt2GR8fH3PDDTeYmJgYt3Hff/+98fLyMl26dHEb3qJFCyPJNGrUyPzxxx9u67/hhhuMl5eXiYuLy/MeW7RokW8NF9oHKleu7FbbiRMnTOnSpU1AQIApU6aM2b59u2tcWlqaqVWrlvH19TVHjx51DT9+/LgJCgoypUuXNtu2bXNbz5YtW0zx4sVNw4YN811/YdoHAVwZnHIJ4Lo0aNAgSdL06dPdhsfFxenrr79W2bJldd9997mG53fdkJeXl4YNGyZJ+uabb65KnZMnT5bT6dQHH3yQ5xTJF154QaVLl3YdJTqfuLg4SVLFihUvav0xMTH69ttvFRERoWeffdZt3O23366HHnpIx48f12effXZRyz2XUaNGuR0dcTqd6tevnyRp3bp1F728/E4rLVOmTIHm/eCDDyRJr776qkqUKOEaXrx4cU2YMEFS3s/P5di0aZPGjh2b79fSpUvznefNN990e4+5n9vk5GS30+pmzZolSRo5cqTbEUNfX1+NHz/+sup2OBwyxsjPz09eXnl/bcg9kvZnAQEBeuONN9yOqtWuXVt33HGHduzYoZSUlAuut3jx4vriiy/UoEEDrVmzRo899pjq1q2rkiVLqkWLFpo6dWqeU5LfeecdZWZmavLkyapQoYLbuNatW6tz585avHixTp06lWd9EyZMUEhIiNv6e/XqpZycHP36668XrLegXnjhBbfagoKC1LlzZ50+fVpPPPGEatWq5Rrn5+ennj17KiMjQzt27HANnz17tpKSkjR27FjVrl3bbfl169bVwIEDtXHjRm3fvj3P+gvTPgjgyuCUSwDXpVatWumGG27QDz/8oB07drh+SZoxY4aysrLUt29ft1PQ/vjjD02cOFFff/219u/fr9TUVLflHTly5IrXePr0aW3atEllypRxndr0V35+fm6/yF1pGzdulCQ1b94831PyWrVqpY8++kgbN25U7969L3t9jRs3zjOsUqVKknRRN2rp1auXPvvsMzVt2lQ9e/bUXXfdpTvuuOOiAu2GDRvk5eWV76lyLVq0kLe3t2v7XAl9+vTJ88y38ylVqpSqV6+eZ3h+2yu3zmbNmuWZ/tZbb72sG/8EBgaqU6dOWrx4sRo0aKD7779fzZs3V9OmTfOczpyrRo0aCgwMPG/tfw7R53LTTTdp48aN+vXXX7VixQpt2LBBP/30k1avXq3Vq1frvffe04oVKxQcHCzp7GMlpLPXt61fvz7P8hISEpSdna3du3fr5ptvdht3pT6bF5LfenJvsvPXmiS5wl9MTIxrWO773LRpU77X7O7evVuStGPHjjyBrzDtgwCuDAIdgOtS7g1PRowYoenTp+v111+XMUZRUVFyOByuG6dIZ6/3ueWWW3TgwAE1adJEvXv3VkhIiJxOp5KSkjR58uSLujlJQZ04cULGGB07duyybyIQFhYm6eKDZ+6NEHLnP9dyk5KSLr24P8nverPcsJGdnV3g5XTr1k1ffvmlXn/9dX3wwQeaNm2apLO/EI8fP15t27a94DKSk5MVEhIiX1/ffGsqU6aMEhISClzTlXaua/Py2165fSxXrlye6b29vfM9inYxPvnkE02YMEFz5851XRtVrFgxde/eXa+99lqe9V5M7QXRuHFjtyCybt069enTx3XUM/cPIn/88YckaeLEieddXn5HCK/UZ/NC8nucQe56zjcuMzPTNSz3fZ7rxk+5rub7vBL7IIArg1MuAVy3+vXrJx8fH82ePVsZGRlavny59u/fr7vuusvtyMf06dN14MABjR49Wr/88oumTp2ql19+WWPGjFHPnj0LvL7c09GysrLyHf/XUJT7y1vDhg1ljDnv14U0btxYfn5+iomJcf11viBya4iPj893fO6pnH/+RfN87/NKBb+C6Nixo5YvX64TJ05o2bJl+vvf/65t27bp3nvvzfdUs78qVaqUjh8/7vaLcq6srCwlJibme5SpMMqt8+jRo3nGZWdnuwLApfL399eYMWO0e/duHTp0SB999JGaNWumjz76SN27d7+sZV+KJk2a6O2335Yk1408pP9+TpOTk8+7P7Vo0eKa13wl5b7PTZs2nfd9FuTmM5fjcvdBAFcGgQ7AdatcuXLq3LmzEhMTtWjRItf1ULnX1+Xau3evJOn+++/Ps4xVq1YVeH25p30dPnw4z7i9e/e63RZckkqUKKE6depo27ZtOn78eIHXkx9/f389+uijkqQXX3zxgtPnHnFs2LChJGnt2rX5BrQVK1ZIktvd8s73Pq/ktUbe3t4FOmJQvHhxtWrVSm+88Yaef/55ZWRkaMmSJRecr2HDhsrJydHq1avzjFu9erWys7PP+RzDwubPffyrn3/++Zx/ZLgUlSpVUq9evfTNN9+oevXqWrt27WUHxktRsmRJSXL7g8ett94q6ezt9K8mLy+vK3rU7mJdq/d5tfdBAFcGgQ7AdS331MrXX39dCxcuVJkyZdS1a1e3aXKfufXX56pt3Ljxom4oceONNyowMFCff/6526l6Z86c0dChQ/Od5x//+IcyMjLUv3//fI9unThxQhs2bCjQ+l9++WVVrFhRc+bM0TPPPJPvs6sSExM1dOhQffzxx5LO3kSlbdu2io6OznMd3y+//KK5c+cqODjYbZs1adJE0n+vR8x1+PDhAoXJgipdurSOHTuW7/tYvXp1viEl9wjVua7t+rP+/ftLkkaMGKHTp0+7hp8+fVrPPfecJOmxxx67pNqvtdzrG8eNG+f2h4OMjAw9//zzl7XsY8eOacuWLXmGp6amKiUlRU6nM9/TVi/XunXrNHPmzHz7n5mZ6bpxzZ133uka/uSTT8rHx0d///vf8z1SnZGRcUVCUOnSpfP9g8a10q9fPwUFBWns2LH53sgkJycn3+dEXqyrvQ8CuDK4hg7Ada1du3aqUqWK65eeJ598Ms8vn71799bEiRM1fPhwrVixQjVq1NCePXv05Zdfqlu3bvrkk08KtC4fHx8NGzZML730kho2bKiuXbsqKytL3333ncLDw103Pviz/v3767ffftPUqVN1ww036O6771ZERISOHz+uAwcOaPXq1erXr5/efffdC66/XLlyWrZsmbp06aLXXntNs2bNUtu2bRUREeG6S97KlSuVnp6uRYsWueZ79913dccdd+iZZ57Rt99+q8aNG7ueQ+fl5aUZM2a4joZIUtOmTXXnnXdq9erVatKkiVq1aqWjR49q8eLFuvvuu6/YL7qtW7fW+vXr1b59e915553y8/NT/fr11alTJw0dOlRHjhzRHXfc4XoY+m+//ably5ercuXKeR6cnp+HH35Yn3/+uebPn686deqoS5cucjgcWrRokQ4cOKCePXuqV69eV+S9SNLvv/9+zofOSzrvuAtp0aKFBg0apPfee0916tTR/fffLx8fHy1evFilSpVSeHh4vneoLIgjR46oYcOGqlevnm666SZVqlRJJ0+e1Jdffqn4+HgNHTrU7fNxpcTGxqpfv3568skn1axZM9eDuOPi4rR06VLFx8erevXqGjVqlGueG2+8UR988IH69++vOnXqqH379oqMjFRmZqYOHTqkNWvWKDQ0VDt37rys2lq3bq2PP/5YnTp1UqNGjeTj46M777zTLVxeTaVLl9aCBQvUtWtX3XrrrWrdurXq1Kkjh8Ohw4cP66efftIff/yhtLS0y1rP1d4HAVwh1+r5CADgKS+//LLr2Vg7d+7Md5pt27aZTp06mdDQUBMQEGAaNWpk3n///Qs+U+qvzyPLyckx48ePN9WqVTM+Pj6mUqVK5plnnjGpqannfabT4sWLTceOHU1oaKjx8fEx5cqVM7fccosZOXKk2bFjx0W93/T0dDN9+nTToUMHU758eePj42NKlChh6tata5566imzefPmPPPExMSYv/3tbyYiIsL4+PiY0qVLm/vuu8+sW7cu33WcOHHCDBgwwISGhhpfX19Tp04dM23atIveXsac+/l9KSkp5m9/+5upUKGC8fb2dlvuJ598Yh588EFTvXp1U7x4cVOyZElTp04d8/zzz5uEhIQCb6vs7GwzZcoUc/PNNxt/f3/j7+9vGjVqZN5++22TnZ2dZ/rLeQ7dhb7+7HyflXM9BzA7O9u88cYbpmbNmsbX19eEhYWZwYMHm6SkJFOiRAlTv379fOu60HPoTpw4YcaOHWvuuusuEx4ebnx9fU358uVNixYtzNy5c01OTo7b/DrPM9ouZvudPHnSzJ071/Tt29fUq1fPlC5d2nh7e5vg4GBz2223mfHjx7s9P/DPNm/ebPr06WMiIiKMr6+vCQ4ONnXq1DGDBg0yy5Ytc5s29zl0+TnXNjp69Kh56KGHTNmyZY2Xl5fb5/dS9oHzPdvxXDXkrmvIkCGmevXqxs/Pz5QsWdLUrFnTPPLII2bhwoUFXr8n90EAl89hTAGutgcAAFbas2ePIiMj9eCDD2revHmeLgcAcIVxDR0AANeB+Ph45eTkuA07ffq0hg8fLkl5rh0FAFwfuIYOAIDrwKRJkzRv3jy1bNlSYWFhio+P17JlyxQTE6MOHTrogQce8HSJAICrgEAHAMB1oG3bttq0aZO+/fZbHT9+XE6nU5GRkRo6dKiGDx8uh8Ph6RIBAFcB19ABAAAAgKW4hg4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFHe5LIROnDihrKwsT5eBqyw0NFTHjh3zdBm4Buh10UGviw56XXTQ66KjsPXa6XQqODj4wtNdg1pwkbKyspSZmenpMnAV5d4+PCsrS9xo9vpGr4sOel100Ouig14XHTb3mlMuAQAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsJTT0wUgry2pUlK6w9Nl4Go7Gff//6HX1z16XXTQ66KDXhcd9LrI6Bbm6QouDUfoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoMvH/Pnz9cwzz3i6DAAAAAA4ryIf6Hr06KF169a5DevcubNGjRrloYoAAAAAoGCcni6gMCpWrJiKFSvm6TIAAAAA4Lw8FujGjBmjiIgI+fr6atmyZXI6nWrbtq169OghSUpNTdWHH36o9evXKysrS9WqVVOfPn1UpUoV1zI+/fRTLVmyRBkZGbr99ttVsmRJ/f7775o4caIkae/evZo3b56io6OVlZWlKlWqqE+fPqpWrZokaciQIZKk1157TZIUGhqqKVOmaP78+Vq/fr0mTpyoTZs26d///rfee+89FS9e3LXuGTNm6NChQxo9erQkaefOnZo7d6727dunwMBA3XLLLXr44YcJhgAAAACuGo+ecrlq1Sr5+fnplVde0SOPPKJPP/1UmzdvliS98cYbSk5O1vPPP69XX31VVatW1UsvvaSUlBRJ0po1a/TZZ5+pV69eevXVV1WmTBl9++23bstPS0tTixYt9OKLL2rcuHEKCwvT+PHjdebMGUnS+PHjJUmDBw/We++953r9Z/Xq1VNAQIB++eUX17CcnBz9+OOPat68uSQpPj5e48aNU9OmTfXaa69p+PDh2rVrlz744IPzvv/MzEydPn3a9ZVbFwAAAIBrz+FwFJqvgvLoKZeVK1fWAw88IEkKCwvT0qVLtWXLFvn6+mrv3r2aPn26fHx8JEm9e/fW+vXr9fPPP6tNmzZaunSpWrVqpbvuukuS1L17d23atElpaWmu5detW9dtfYMGDVK/fv20fft23XzzzQoMDJQkBQQEKCgoKN8avby8dMcdd2jt2rVq1aqVJGnLli06ffq0mjZtKklatGiRmjdvro4dO7reS79+/TR69GgNGDBAvr6++S574cKFWrBgget11apVNWHChIvahgAAAACujPLly3u6hIvm0UAXERHh9jo4OFjJycmKjo5WWlqa+vfv7zY+IyND8fHxkqTY2Fi1a9fObXz16tW1detW1+ukpCR9/PHH2r59u5KTk5WTk6OMjAwlJiZeVJ3NmjXTyJEjdfz4cYWEhGjNmjVq2LCh6xTMgwcP6uDBg1qzZo3bfMYYJSQkqGLFivkut2vXrrr33ntdry8miQMAAAC4suLj42WM8XQZkiSn06nQ0NALT3cNajn3yp15V2+MUVpamoKDgzVmzJg84wMCAgq8/ClTpiglJUV9+/ZVaGiofHx8NHLkSGVlZV1UndWrV1f58uX1448/ql27dlq/fr0GDx7sGp+WlqY2bdronnvuyTNvmTJlzrlcHx8f1xFIAAAAAJ5ljCk0ga6gCuVdLqtVq6akpCR5eXmpbNmy+U4THh6uffv2qUWLFq5h+/btc5tm165dGjBggBo1aiRJSkxM1KlTp9ym8fb2Vk5OzgVratasmdasWaOQkBA5HA7XMqWzp0oeOXLEykO0AAAAAOxVKJ9DV69ePUVGRrruMpmQkKBdu3Zp3rx5rtDWvn17LV++XCtXrlRcXJw+/fRTHTx40O20xbCwMK1evVoxMTHas2eP3nrrrTzXs5UtW1Zbt25VUlKS64Yr+WnevLkOHDighQsX6tZbb3U7snbfffdp165dioqKUnR0tOLi4rR+/XpFRUVd4S0DAAAAAP9VKI/QORwOjRgxQvPmzdPUqVN18uRJBQUFqVatWipVqpSkswHr6NGj+vDDD5WZmanbbrtNLVu21N69e13L+dvf/qb33ntP//znP1WmTBk99NBD+vDDD93W9eijj2r27NlatmyZQkJCNGXKlHxrKl++vKpXr669e/eqT58+buMqV66sMWPG6OOPP9aoUaNkjFH58uV12223XeEtAwAAAAD/5TC2nSR6Hi+99JKCgoL01FNPebqUy7I8+piS0i/uOj8AAAAAl65bzTDFxcUVmmvofHx8CnRTlEJ5ymVBpKen68svv9Thw4d15MgRzZ8/X1u2bHG7pg4AAAAArmeF8pTLgnA4HNq4caM+++wzZWZmKjw8XP/zP/+jm266ydOlAQAAAMA1YW2g8/X11QsvvODpMgAAAADAY6w95RIAAAAAijoCHQAAAABYikAHAAAAAJYi0AEAAACApQh0AAAAAGApAh0AAAAAWIpABwAAAACWItABAAAAgKUIdAAAAABgKQIdAAAAAFiKQAcAAAAAliLQAQAAAIClCHQAAAAAYCkCHQAAAABYikAHAAAAAJYi0AEAAACApQh0AAAAAGApAh0AAAAAWIpABwAAAACWcnq6AORVr7iU6Ws8XQauIofDobCwMMXFxckYen09o9dFB70uOuh10UGviw6Hw+HpEi4ZR+gAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUk5PF4C8tqRKSekOT5eBq+1k3P//h15f9+h10UGviw56XXTQ6yKjW5inK7g0HKEDAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSRS7Qbdu2TT169FBqaup5pxsyZIi++uqra1QVAAAAAFw8hzHGeLqIaykrK0spKSkqVaqUHA6HVq5cqZkzZ2rmzJlu0508eVJ+fn7y8/O75jUujz6mpPSsa75eAAAAoKjqVjNMcXFxKizxyMfHR6GhoRecznkNailUnE6ngoKCLjhdYGDg1S8GAAAAAC5DoQx0Y8aMUaVKlSRJq1evltPpVNu2bdWzZ085HA6lpKRo5syZ+u2335SZmanatWurX79+CgsLkyQdO3ZMUVFR2rVrl7KyshQaGqpHHnlEjRo10rZt2zR27FjNmDFD0dHRmjp1qiSpR48ekqTu3burR48eGjJkiO655x517NhRkydPVk5Ojv7+97+7aszKytLjjz+u3r17q0WLFsrJydHnn3+u77//XklJSQoPD9f999+vW2+99RpvPQAAAABFRaEMdJK0atUqtWrVSuPHj9e+ffv03nvvqUyZMmrTpo2mTp2quLg4Pfvss/L399ecOXM0fvx4vfHGG3I6nYqKilJWVpbGjh0rPz8/xcTEqFixYnnWUbNmTfXt21effPKJJk+eLEn5Tte8eXO98cYbSktLc43ftGmT0tPT1aRJE0nSokWLtGbNGg0cOFBhYWHasWOH3nrrLQUGBqp27dpXcUsBAAAAKKoKbaArXbq0+vTpI4fDofDwcB06dEhfffWV6tSpo19//VUvvfSSatasKUkaOnSonnjiCa1fv1633XabEhMT1bRpU0VEREiSypUrl+86nE6nAgIC5HA4znsaZv369eXn56d169bpzjvvlCStXbtWjRs3lr+/vzIzM7Vw4UK98MILioyMdK1z586d+u67784Z6DIzM5WZmel67XA45O/vf9HbCgAAAMDlczgcni7hohXaQFejRg23DRoZGakvv/xSMTEx8vb2Vo0aNVzjSpYsqfDwcB05ckSS1KFDB02fPl2bN29WvXr11LRpU1WuXPmSa/H29tZtt92mNWvW6M4771RaWpp+/fVXDRs2TJIUHx+v9PR0vfTSS27zZWVlqWrVqudc7sKFC7VgwQLX66pVq2rChAmXXCcAAACAS1e+fHlPl3DRCm2guxytW7dW/fr1tWHDBm3evFkLFy5U79691aFDh0teZvPmzTVmzBglJydr8+bN8vX1VYMGDSRJaWlpkqQRI0YoJCTEbT6n89ybuGvXrrr33ntdr238iwAAAABwvYiPjy80d7l0Op123+Vy7969bq/37Nmj8uXLq2LFisrOztaePXtcp1yeOnVKsbGxqlixomv6MmXKqF27dmrXrp3mzp2rZcuW5RvonE6ncnJyLlhPzZo1Vbp0af3444/6/fffdeutt7rCWsWKFeXj46PExMSLul7Ox8dHPj4+BZ4eAAAAwNVjjCk0ga6gCu2DxRMTEzVr1izFxsZq7dq1WrJkie655x6FhYWpcePGmjZtmnbu3Kno6Gi99dZbCgkJUePGjSVJM2fO1O+//66EhATt379f27ZtU4UKFfJdT2hoqNLS0rRlyxadPHlS6enp56ypWbNm+u6777R582Y1b97cNdzf31+dOnXSrFmztHLlSsXHx2v//v1asmSJVq5ceUW3CwAAAADkKrRH6O68805lZGRoxIgR8vLy0j333KM2bdpIkgYPHqyZM2fq1VdfVVZWlmrVqqURI0a4jpjl5OQoKipKx48fl7+/vxo0aKA+ffrku56aNWuqbdu2mjRpkk6dOuV6bEF+mjVrps8++0yhoaGuo4O5evbsqcDAQC1atEhHjx5V8eLFVbVqVXXt2vUKbhUAAAAA+C+HKYTHFMeMGaMqVaqob9++ni7FI5ZHH1NSepanywAAAACKjG41wxQXF1doTrn08fEp0DV0hfaUSwAAAADA+RHoAAAAAMBShfIaujFjxni6BAAAAAAo9DhCBwAAAACWItABAAAAgKUIdAAAAABgKQIdAAAAAFiKQAcAAAAAliLQAQAAAIClCHQAAAAAYCkCHQAAAABYikAHAAAAAJYi0AEAAACApQh0AAAAAGApAh0AAAAAWIpABwAAAACWItABAAAAgKUIdAAAAABgKQIdAAAAAFiKQAcAAAAAliLQAQAAAIClCHQAAAAAYCmnpwtAXvWKS5m+xtNl4CpyOBwKCwtTXFycjKHX1zN6XXTQ66KDXhcd9LrocDgcni7hknGEDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACzl9HQByGtLqpSU7vB0GbjaTsb9/3/o9XWPXhcd9LrooNdFB70uMrqFebqCS8MROgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLFelAt23bNvXo0UOpqameLgUAAAAALtp1EehWrlypvn37XvR8NWvW1HvvvaeAgIACzzNlyhT9+9//vuh1AQAAAMCV5vR0AZcrKyvrkud1Op0KCgq6csUAAAAAwDXkMMaYgk48ZMgQ3XPPPerYsaNr2DPPPKNbbrlFPXr0UI8ePfT4449rw4YN2rRpk0JCQtS7d281btzYNf3hw4c1Z84c7dixQ8YYValSRYMHD1b58uUlScuWLdOXX36phIQEhYaGqkOHDrr77rslSQkJCXryySc1fPhwffPNN9q7d68GDhyoqVOnutXZvXt39ejRQ6tXr9bXX3+t2NhY+fn5qW7duurbt69KlSol6ewpl2PHjtWMGTNUvHhxrVy5UjNnztTw4cM1a9YsJSYm6sYbb9TgwYMVHBys+fPna8GCBW7rGj16tBYsWKCKFSvqsccecw0/efKkHn/8cT3//POqV69eQTexJGl59DElpV96UAUAAABwcbrVDFNcXJwuIh5dVT4+PgoNDb3gdFf8CN2CBQvUq1cvPfroo1qyZInefPNNTZ06VSVKlNDx48c1evRo1a5dW6NGjZK/v7927dqlnJwcSdKaNWs0f/589e/fX1WrVtWBAwc0bdo0+fn5qWXLlq51zJkzR71791bVqlXlcDjUt29fffLJJ5o8ebIkqVixYpLOHr3r2bOnwsPDlZycrNmzZ2vq1KkaMWLEOetPT0/X4sWL9eSTT8rhcOitt97Shx9+qKFDh6pz5846cuSIzpw5o8GDB0uSSpQoodatWysqKkq9e/eWj4+PJGn16tUKCQlR3bp1z7muzMxMZWZmul47HA75+/tf2oYHAAAAcFkcDoenS7hoVzzQtWjRQs2aNZMkPfTQQ1qyZIn27t2rBg0aaOnSpQoICNDw4cPldJ5ddXh4uGve+fPn69FHH1XTpk0lSWXLllVMTIy+//57t0DXsWNH1zSSFBAQIIfDkef0yVatWrn+X65cOfXr108jRoxQWlqaK/T9VXZ2tgYOHOg6Yti+fXvXUblixYrJ19dXmZmZbutq0qSJoqKitH79et1+++2SpFWrVqlly5bn/VAsXLjQ7Yhf1apVNWHChHNODwAAAODqyc0ANrniga5y5cqu/xcrVkz+/v5KTk6WJB08eFA33nijK8z9WVpamo4ePap3331X06ZNcw3PycnJc9OSatWqFaiW/fv3a/78+Tp48KBSU1Ndh08TExNVsWLFfOfx8/Nza2RwcLBOnjx53vX4+vrqzjvv1IoVK3T77bdr//79OnTokJ599tnzzte1a1fde++9rtc2/kUAAAAAuF7Ex8cXmlMunU7nlT/l0uFw5HmD2dnZbq+9vb3POU/u6Yj5SUtLkyQ9/vjjqlGjhts4Ly/3m3Ge6+jaX5c3btw41a9fX0OHDlVgYKASExM1bty4895I5a/1SypQU1u3bq1nnnlGf/zxh1auXKm6detesAE+Pj7n3SYAAAAArh1jTKEJdAV1UYEuMDBQSUlJrtenT59WQkJCgeevXLmyVq1apaysrDxH6YKCghQcHKyjR4+qefPmF1OWnE6n6zq8XLGxsTp16pQefvhhlSlTRpK0b9++i1puQdclSREREbrhhhu0bNkyrV27Vv3797/sdQEAAADA+VzUc+jq1q2r1atXa8eOHTp06JCmTJmS5+jZ+bRv315nzpzRpEmTtG/fPsXFxWn16tWKjY2VJPXo0UOLFi1y3Zny0KFDWrFihb788svzLjc0NFRpaWnasmWLTp48qfT0dJUpU0ZOp1NLly7V0aNH9euvv+rTTz+9mLd7znUdOnRIsbGxOnnypNvRvlatWmnRokUyxqhJkyaXvS4AAAAAOJ+LOkLXpUsXJSQk6NVXX1VAQIB69ux5UUfoSpYsqVGjRumjjz7SmDFj5OXlpSpVqqhmzZqSzp626Ofnpy+++EIfffSR/Pz8FBER4faYhPzUrFlTbdu21aRJk3Tq1CnXYwsGDx6sefPmacmSJapataoeffTRy34oeJs2bbR9+3Y999xzSktL0+jRo1WnTh1JUrNmzTRr1izdcccd8vX1vaz1AAAAAMCFXNRz6HB+CQkJeuqppzR+/PgC37glPzyHDgAAALi2eA5dEZaVlaWUlBR9/PHHioyMvKwwBwAAAAAFdVHX0CF/u3bt0qBBg7Rv3z4NHDjQ0+UAAAAAKCI4QncF1KlTR/Pnz/d0GQAAAACKGI7QAQAAAIClCHQAAAAAYCkCHQAAAABYikAHAAAAAJYi0AEAAACApQh0AAAAAGApAh0AAAAAWIpABwAAAACWItABAAAAgKUIdAAAAABgKQIdAAAAAFiKQAcAAAAAliLQAQAAAIClCHQAAAAAYCkCHQAAAABYikAHAAAAAJYi0AEAAACApQh0AAAAAGApAh0AAAAAWMrp6QKQV73iUqav8XQZuIocDofCwsIUFxcnY+j19YxeFx30uuig10UHvS46HA6Hp0u4ZByhAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEs5PV0A8tqSKiWlOzxdBq62k3H//x96fd2j10UHvS466HXRQa+LjG5hnq7g0nCEDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxXKQDdmzBjNnDlTkjRkyBB99dVX551+/vz5euaZZy55HQAAAABgI6enC7iQ8ePHy8/Pz/W6R48eevrpp9WkSRPXsM6dO6tDhw4Xtdynn35a3t7eV6xOAAAAALjWCn2gCwwMvOA0xYoVU7FixS5quSVKlLjUkgAAAACgUCj0gW7IkCG655571LFjRw0ZMkSS9Nprr0mSQkNDNWXKFM2fP1/r16/XxIkTJUnZ2dmaNWuWVq9eLS8vL7Vq1UpJSUk6ffq0nn32WUlnT7msUqWK+vbtK0lKSUnRzJkz9dtvvykzM1O1a9dWv379FBYWJkl51iFJX331lb7++mtNmTJFkrRt2zZ99NFHiomJkbe3typVqqShQ4cqNDT0mmwrAAAAAEVLoQ90fzZ+/HgNGDBAgwcPVoMGDeTllf8lgJ9//rnWrl2rwYMHq0KFCvr666+1fv161alT55zLnjp1quLi4vTss8/K399fc+bM0fjx4/XGG2/I6bzwZsrOztbEiRPVunVrDRs2TFlZWdq7d68cDsclv18AAAAAOB+rAl3u6ZcBAQEKCgo653RLlixRly5dXNfZPfbYY9q4ceM5p4+Li9Ovv/6ql156STVr1pQkDR06VE888YTWr1+v22677YK1nTlzRqdPn9bNN9+s8uXLS5IqVqx43nkyMzOVmZnpeu1wOOTv73/BdQEAAAC48mw8GGNVoCuI06dPKzk5WdWrV3cN8/LyUrVq1ZSTk5PvPEeOHJG3t7dq1KjhGlayZEmFh4fryJEjBVpviRIl1LJlS40bN0716tXTTTfdpNtuu03BwcHnnGfhwoVasGCB63XVqlU1YcKEAq0PAAAAwJWVe2DGJtddoLta8ju9Mzs72+314MGD1aFDB/3+++/68ccf9fHHH+tf//qXIiMj811m165dde+997pe2/gXAQAAAOB6ER8fL2OMp8uQJDmdzgLdi6NQPofufLy9vc95pE06ezpmqVKltG/fPtewnJwcHThw4JzzVKhQQdnZ2dqzZ49r2KlTpxQbG+s6bTIwMFBJSUluDY6Ojs6zrKpVq6pr1656+eWXValSJa1du/ac6/Xx8VFAQIDri9MtAQAAAM8xxhSar4KyLtCVLVtWW7duVVJSklJSUvKdpkOHDlq0aJHWr1+v2NhYzZgxQykpKec8AhYWFqbGjRtr2rRp2rlzp6Kjo/XWW28pJCREjRs3liTVrl1bJ0+e1Oeff674+HgtXbrU7bq8hIQEzZ07V7t379axY8e0adMmxcfHX/A6OgAAAAC4VNadcvnoo49q9uzZWrZsmUJCQlyPDPiz++67T0lJSXr77bfl5eWlNm3aqH79+ue8K6Z09nTJmTNn6tVXX1VWVpZq1aqlESNGuO5wWbFiRT322GNauHChPv30UzVt2lSdOnXSsmXLJEm+vr46cuSIVq1apVOnTik4OFh333232rRpc3U2BAAAAIAiz2EKy0miV1FOTo7+/ve/67bbbtODDz7o6XIuaHn0MSWlZ3m6DAAAAKDI6FYzTHFxcYXmGjofH58CXUNn3RG6gsg95bF27drKysrS0qVLlZCQoGbNmnm6NAAAAAC4Yq7LQOdwOLRq1Sp9+OGHkqRKlSrphRde4Ho2AAAAANeV6zLQlSlTRi+99JKnywAAAACAq8q6u1wCAAAAAM4i0AEAAACApQh0AAAAAGApAh0AAAAAWIpABwAAAACWItABAAAAgKUIdAAAAABgKQIdAAAAAFiKQAcAAAAAliLQAQAAAIClCHQAAAAAYCkCHQAAAABYikAHAAAAAJYi0AEAAACApQh0AAAAAGApAh0AAAAAWIpABwAAAACWItABAAAAgKUIdAAAAABgKaenC0Be9YpLmb7G02XgKnI4HAoLC1NcXJyModfXM3pddNDrooNeFx30uuhwOByeLuGScYQOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSBDoAAAAAsBSBDgAAAAAsRaADAAAAAEsR6AAAAADAUgQ6AAAAALAUgQ4AAAAALEWgAwAAAABLEegAAAAAwFIEOgAAAACwFIEOAAAAACxFoAMAAAAASxHoAAAAAMBSTk8XgLycTtpSVNDrooNeFx30uuig10UHvS46ClOvC1qLwxhjrnItKKDMzEz5+Ph4ugwAAAAAluCUy0IkMzNTkydP1pkzZzxdCq6yM2fO6J///Ce9LgLoddFBr4sOel100Ouiw+ZeE+gKmR9++EEcNL3+GWN04MABel0E0Ouig14XHfS66KDXRYfNvSbQAQAAAIClCHQAAAAAYCkCXSHi4+Oj7t27c2OUIoBeFx30uuig10UHvS466HXRYXOvucslAAAAAFiKI3QAAAAAYCkCHQAAAABYikAHAAAAAJYi0AEAAACApZyeLgBnLV26VIsXL1ZSUpIqV66s/v37q3r16p4uC+cwf/58LViwwG1YeHi4Jk2aJEnKyMjQ7Nmz9eOPPyozM1P169fXgAEDFBQU5Jo+MTFR77//vrZt26ZixYqpRYsWevjhh+Xt7e2aZtu2bZo9e7YOHz6s0qVL6/7771fLli2vwTssurZv364vvvhCBw4c0IkTJ/T000+rSZMmrvHGGM2fP1/Lli1TamqqbrzxRg0YMEBhYWGuaVJSUvTBBx/ot99+k8PhUNOmTdWvXz8VK1bMNc3BgwcVFRWlffv2KTAwUO3bt9d9993nVstPP/2kTz75RMeOHVP58uXVq1cvNWrU6OpvhCLiQr2eMmWKVq1a5TZP/fr1NXLkSNdrem2HhQsXat26dTpy5Ih8fX0VGRmpRx55ROHh4a5pruX3bX7mXz0F6fWYMWO0fft2t/natGmjQYMGuV7T68Lv22+/1bfffqtjx45JkipWrKju3burYcOGkorYPm3gcT/88IN56KGHzPLly83hw4fNu+++a/r27WuSkpI8XRrO4ZNPPjH/+Mc/zIkTJ1xfycnJrvHvvfee+dvf/ma2bNli9u3bZ55//nnzr3/9yzU+Ozvb/OMf/zAvvviiOXDggNmwYYPp37+/mTNnjmuao0ePmkceecTMmjXLHD582CxZssT07NnTbNy48Vq+1SJnw4YNZt68eeaXX34xDzzwgPnll1/cxi9cuND06dPHrFu3zkRHR5sJEyaYIUOGmPT0dNc048aNM08//bTZvXu32bFjh3nqqafMpEmTXONTU1PNgAEDzOTJk82hQ4fM2rVrTa9evcx3333nmmbnzp2mZ8+e5vPPPzeHDx828+bNMw8++KA5ePDg1d8IRcSFev3222+bcePGue3np06dcpuGXtvh5ZdfNitWrDCHDh0yBw4cMK+88op54oknzJkzZ1zTXKvv2/zMv7oK0uvRo0ebd999123fTk1NdY2n13ZYv369+e2330xsbKw5cuSImTt3rnnwwQfNoUOHjDFFa58m0BUCI0aMMNOnT3e9zs7ONoMGDTILFy70XFE4r08++cQ8/fTT+Y5LTU01Dz74oPnpp59cw2JiYswDDzxgdu3aZYw5+4tkjx49zIkTJ1zTfPPNN6Z3794mMzPTGGPMhx9+aP7xj3+4Lft///d/zcsvv3yF3w3O5a+/5Ofk5JiBAweazz//3DUsNTXVPPzww2bt2rXGGGMOHz5sHnjgAbN3717XNBs3bjQ9evQwf/zxhzHmbK/79u3r6rUxxnz00Udm2LBhrtdvvPGGGT9+vFs9zz//vJk2bdoVfY8461yBbsKECeech17bKzk52TzwwANm27Ztxphr+32bn/nX1l97bczZQDdjxoxzzkOv7dW3b1+zbNmyIrdPcw2dh2VlZWn//v2qV6+ea5iXl5fq1aun3bt3e7AyXEh8fLwef/xxPfnkk3rzzTeVmJgoSdq/f7+ys7PdelqhQgWVKVPG1dPdu3crIiLC7bB/gwYNdObMGR0+fFiStGfPHrdlSGdP9+Jz4TkJCQlKSkrSTTfd5BoWEBCg6tWru/W2ePHiuuGGG1zT1KtXTw6HQ3v37nVNU6tWLTmd/z3rvX79+oqNjVVKSoprmvz6v2fPnqv2/pDX9u3bNWDAAA0bNkzvv/++Tp065RpHr+11+vRpSVKJEiUkXbvv2/zMv/b+2utca9as0WOPPab/+Z//0dy5c5Wenu4aR6/tk5OTox9++EHp6emKjIwscvs019B52MmTJ5WTk+P2YZKkoKAgxcbGeqYoXFCNGjU0ePBghYeH68SJE1qwYIFGjRql119/XUlJSXI6nSpevLjbPKVKlVJSUpIkKSkpKU/PS5Uq5RqX+2/usD9Pc+bMGWVkZMjX1/eqvDecW25v8uvLn/sWGBjoNt7b21slSpRwm6Zs2bJu0+R+HpKSklzTnm89uPoaNGigpk2bqmzZsoqPj9e8efP0yiuvaNy4cfLy8qLXlsrJydHMmTNVs2ZNRURESNI1+76dkpLCz/xrKL9eS1KzZs1UpkwZhYSE6ODBg5ozZ45iY2P19NNPS6LXNjl06JBGjhypzMxMFStWTE8//bQqVqyo6OjoIrVPE+iAS5B7wa0kVa5c2RXwfvrpJ4IWcJ244447XP+PiIhQ5cqV9dRTT2nbtm15/mILe0RFRenw4cN68cUXPV0KrrJz9bpNmzau/0dERCg4OFgvvvii4uPjVb58+WtdJi5DeHi4Jk6cqNOnT+vnn3/WlClTNHbsWE+Xdc1xyqWHBQYGuv7S+2f5/dUAhVfx4sUVHh6u+Ph4BQUFKSsrS6mpqW7TJCcnu3oaFBSUp+fJycmucbn/5g778zT+/v6ERg/J7U1+fflz306ePOk2Pjs7WykpKeftf+7rC/Wf7wueU65cOZUsWVLx8fGS6LWNoqKitGHDBo0ePVqlS5d2Db9W37f5mX/tnKvX+cm9G+Gf9216bQen06ny5curWrVqevjhh1WlShV9/fXXRW6fJtB5mNPpVLVq1bR161bXsJycHG3dulWRkZEerAwXIy0tzRXmqlWrJm9vb23ZssU1PjY2VomJia6eRkZG6tChQ27fJDZv3ix/f39VrFhR0tnTOv+8jNxp+Fx4TtmyZRUUFOTWl9OnT2vv3r1uvU1NTdX+/ftd02zdulXGGNcvDZGRkdqxY4eysrJc02zevFnh4eGu6zwiIyPz7X+NGjWu2vvD+f3xxx9KSUlRcHCwJHptE2OMoqKitG7dOo0aNSrPabDX6vs2P/Ovvgv1Oj/R0dGS5LZv02s75eTkKDMzs8jt0wS6QuDee+/VsmXLtHLlSsXExGj69OlKT0/neWOF2OzZs7V9+3YlJCRo165dmjhxory8vNSsWTMFBASoVatWmj17trZu3ar9+/dr6tSpioyMdO3c9evXV8WKFfX2228rOjpav//+uz7++GPdfffd8vHxkSS1a9dOCQkJ+uijj3TkyBF98803+umnn9SxY0dPvvXrXlpamqKjo10/4BMSEhQdHa3ExEQ5HA7dc889+uyzz/Trr7/q0KFDevvttxUcHKxbbrlF0tnn4DRo0EDTpk3T3r17tXPnTn3wwQe6/fbbFRISIuns9RtOp1PvvvuuDh8+rB9//FFLlizRvffe66rjnnvu0aZNm7R48WIdOXJE8+fP1759+9S+fftrvk2uV+frdVpamj788EPt3r1bCQkJ2rJli/7973+rfPnyql+/viR6bZOoqCitWbNGw4YNk7+/v5KSkpSUlKSMjAxJuqbft/mZf3VdqNfx8fFasGCB9u/fr4SEBP3666+aMmWKatWqpcqVK0ui17aYO3eu63exQ4cOuV43b968yO3TDmOMuWZrwzktXbpUX3zxhZKSklSlShX169ePv84WYpMmTdKOHTt06tQpBQYG6sYbb9SDDz7oOvc+92GWP/zwg7KysvJ9mOWxY8c0ffp0bdu2TX5+fmrRooV69eqV52GWs2bNUkxMDA8Wv0a2bduW7/n3LVq00JAhQ1wPFv/+++91+vRp3XjjjXrsscfcHlqbkpKiqKgot4dN9+/f/5wPmy5ZsqTat2+vLl26uK3zp59+0scff6xjx44pLCyMh01fYefr9cCBAzVx4kQdOHBAqampCgkJ0U033aSePXu67cf02g49evTId/jgwYNd31Ov5fdtfuZfPRfqdWJiot566y0dPnxY6enpKl26tJo0aaJu3bopICDANT29Lvzeeecdbd26VSdOnFBAQIAqV66s++67z3Un6qK0TxPoAAAAAMBSnHIJAAAAAJYi0AEAAACApQh0AAAAAGApAh0AAAAAWIpABwAAAACWItABAAAAgKUIdAAAAABgKQIdAAAAAFiKQAcAAAAAliLQAQAAAIClCHQAAAAAYCkCHQAAAABY6v8ANm7bPhy7ULwAAAAASUVORK5CYII=\n" + }, + "metadata": {} + } + ], + "source": [ + "sentiments.plot(kind='barh', color='lightblue', figsize=(10,6))\n", + "plt.title('Value Counts of English Sentiments')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ajvZ8N_Vc0d2" + }, + "source": [ + "### Data Cleaning\n", + "\n", + "1. Remove usernames from tweets\n", + "2. Remove hashtags from tweets\n", + "3. Remove things like http links and \\n form tweet" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ITTsFZP8c5gn" + }, + "source": [ + "##### Let's start with usernames\n", + "\n", + "- We need to remove the usernames that follow the @ symbol. The symbol will be removed as well\n", + "- Write a function using the re module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "id": "q9J8kWnfc1LQ", + "outputId": "0fd6aabe-609c-46dc-bccb-3ba6c4f0bac5" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'@Charlie_Corley @Kristine1G @amyklobuchar @StyleWriterNYC testimony is NOT evidence in a court of law, state or federal. Must stand up to cross examination'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "# before\n", + "data['Text'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 423 + }, + "id": "QOa6vjUWdBDI", + "outputId": "b70a9368-8c8d-4085-f69e-2d26c7abc3c9" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Text Label\n", + "0 testimony is NOT evidence in a court of law, s... litigious\n", + "1 https://t.co/YJNiO0p1JV Flagstar Bank disclose... litigious\n", + "2 OOPS. I typed her name incorrectly (today’s br... litigious\n", + "3 [When the first sign of surrender comes, Micha... litigious\n", + "4 \"It's Your fight to save the UK from the liars... litigious\n", + "... ... ...\n", + "119995 The neighbors filed a complaint against each o... litigious\n", + "119996 The landlord is facing a lawsuit from the tena... litigious\n", + "119997 A legal battle between former spouses ensued o... litigious\n", + "119998 The homeowners' association sent a cease and d... litigious\n", + "119999 Siblings engaged in a dispute over their late ... litigious\n", + "\n", + "[120000 rows x 2 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TextLabel
0testimony is NOT evidence in a court of law, s...litigious
1https://t.co/YJNiO0p1JV Flagstar Bank disclose...litigious
2OOPS. I typed her name incorrectly (today’s br...litigious
3[When the first sign of surrender comes, Micha...litigious
4\"It's Your fight to save the UK from the liars...litigious
.........
119995The neighbors filed a complaint against each o...litigious
119996The landlord is facing a lawsuit from the tena...litigious
119997A legal battle between former spouses ensued o...litigious
119998The homeowners' association sent a cease and d...litigious
119999Siblings engaged in a dispute over their late ...litigious
\n", + "

120000 rows × 2 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "def remove_usernames(text):\n", + " pattern = r'@\\w+\\s?'\n", + " result = re.sub(pattern, '', text)\n", + " return result\n", + "\n", + "\n", + "data['Text'] = data['Text'].apply(remove_usernames)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "XUxKgGXydJNX", + "outputId": "1c9cfb83-c1dc-46b5-8d48-65800f10d19b" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'testimony is NOT evidence in a court of law, state or federal. Must stand up to cross examination'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "# after\n", + "data['Text'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "TfF-WwgOfjA3", + "outputId": "fdf676a5-55c0-4742-b6be-7ec32c2942ce" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'litigious'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 20 + } + ], + "source": [ + "# corresponding label\n", + "data['Label'][0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xz53q5dqdaLx" + }, + "source": [ + "##### Next, hashtags\n", + "\n", + "- We need to remove the hashtags that follow the @ symbol.\n", + "- Write a function using the re module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "syVSZa51dRft", + "outputId": "2a8d2e94-8453-47f1-a378-d8d29590fb71" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'The plaintiff alleges that the defendant committed defamation by spreading false and damaging statements about their character.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 21 + } + ], + "source": [ + "# before\n", + "data['Text'][100000]" + ] + }, + { + "cell_type": "code", + "source": [ + "data['Label'][100000]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "k_j339ROP8CY", + "outputId": "69c3aec6-0561-4afa-a0fc-7a2eaea1214a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'litigious'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 423 + }, + "id": "qFznFAwOdd3a", + "outputId": "70610046-fc2b-402c-c94c-397ecf3b6f19" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Text Label\n", + "0 testimony is NOT evidence in a court of law, s... litigious\n", + "1 https://t.co/YJNiO0p1JV Flagstar Bank disclose... litigious\n", + "2 OOPS. I typed her name incorrectly (today’s br... litigious\n", + "3 [When the first sign of surrender comes, Micha... litigious\n", + "4 \"It's Your fight to save the UK from the liars... litigious\n", + "... ... ...\n", + "119995 The neighbors filed a complaint against each o... litigious\n", + "119996 The landlord is facing a lawsuit from the tena... litigious\n", + "119997 A legal battle between former spouses ensued o... litigious\n", + "119998 The homeowners' association sent a cease and d... litigious\n", + "119999 Siblings engaged in a dispute over their late ... litigious\n", + "\n", + "[120000 rows x 2 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TextLabel
0testimony is NOT evidence in a court of law, s...litigious
1https://t.co/YJNiO0p1JV Flagstar Bank disclose...litigious
2OOPS. I typed her name incorrectly (today’s br...litigious
3[When the first sign of surrender comes, Micha...litigious
4\"It's Your fight to save the UK from the liars...litigious
.........
119995The neighbors filed a complaint against each o...litigious
119996The landlord is facing a lawsuit from the tena...litigious
119997A legal battle between former spouses ensued o...litigious
119998The homeowners' association sent a cease and d...litigious
119999Siblings engaged in a dispute over their late ...litigious
\n", + "

120000 rows × 2 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "def remove_hastags(text):\n", + " pattern = r'#\\w+\\s?'\n", + " result = re.sub(pattern, '', text)\n", + " return result\n", + "\n", + "\n", + "data['Text'] = data['Text'].apply(remove_hastags)\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TTQ4AqTgdvQh" + }, + "source": [ + "#### Next, hashtags\n", + "\n", + "- We need to remove the hashtags that follow the @ symbol.\n", + "- Write a function using the re module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "id": "1N-D85aWdnIg", + "outputId": "a8258e8c-5c1a-4750-94fa-dc12a9fc9ad4" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'[When the first sign of surrender comes, Michael’s smile splits his face like a sharp knife through flesh. It would chill Dean to his very core if he were to witness this ice cold existence and its nature, and the Archangel would have an enjoyable time »'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "#after there are not more hashtags\n", + "data['Text'][3]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QCEWtqbQguUa" + }, + "source": [ + "##### Next, http links\n", + "\n", + "- We need to remove all http links.\n", + "- Write a function using the re module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "id": "6jaRTxhCgrHi", + "outputId": "c4a691d2-96b5-4297-95f4-4eb065856397" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'[When the first sign of surrender comes, Michael’s smile splits his face like a sharp knife through flesh. It would chill Dean to his very core if he were to witness this ice cold existence and its nature, and the Archangel would have an enjoyable time »'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 25 + } + ], + "source": [ + "#before\n", + "data['Text'][3]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KAVY8qRngyEq" + }, + "outputs": [], + "source": [ + "def remove_urls(string):\n", + " pattern = r'https?://\\S+'\n", + " result = re.sub(pattern, '', string)\n", + " return result\n", + "\n", + "data['Text'] = data['Text'].apply(remove_urls)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "Sw3J5WwLg_BQ", + "outputId": "ceeb04a8-ae9b-4c17-ff6b-4d230ff0d7ae" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'a family issue put her in witness protection as a child.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 27 + } + ], + "source": [ + "#after\n", + "data['Text'][203]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-SQ0K4VVhDjK" + }, + "source": [ + "\n", + "##### Remove special characters\n", + "\n", + "- We need to remove * and newline characters (\\n).\n", + "- Write a function using the re module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Rc6ln_r_hEHd", + "outputId": "da376b6d-c478-4fb2-a054-ff23ce34d22c" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['testimony is NOT evidence in a court of law, state or federal. Must stand up to cross examination',\n", + " ' Flagstar Bank discloses a data breach that impacted 1.5\\nMillion individuals ',\n", + " 'OOPS. I typed her name incorrectly (today’s brave witness) . 6. 7. (probaby) . Don’t forget requesting a . ',\n", + " ...,\n", + " 'A legal battle between former spouses ensued over child custody, with both parties accusing each other of parental alienation and seeking sole custody rights.',\n", + " \"The homeowners' association sent a cease and desist letter to a resident for violating community guidelines by conducting business activities from their home.\",\n", + " \"Siblings engaged in a dispute over their late parent's estate, resulting in a contentious probate case with allegations of undue influence and mismanagement of assets.\"],\n", + " dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "source": [ + "#before\n", + "data['Text'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jecglMH2hO0p", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "outputId": "fe5c4057-9ffa-4485-9c2c-90ca71bda696" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Text Label\n", + "0 testimony is NOT evidence in a court of law, s... litigious\n", + "1 Flagstar Bank discloses a data breach that im... litigious\n", + "2 OOPS. I typed her name incorrectly (today’s br... litigious\n", + "3 [When the first sign of surrender comes, Micha... litigious\n", + "4 \"It's Your fight to save the UK from the liars... litigious" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TextLabel
0testimony is NOT evidence in a court of law, s...litigious
1Flagstar Bank discloses a data breach that im...litigious
2OOPS. I typed her name incorrectly (today’s br...litigious
3[When the first sign of surrender comes, Micha...litigious
4\"It's Your fight to save the UK from the liars...litigious
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 29 + } + ], + "source": [ + "def remove_special_chars(text):\n", + " pattern = r'[*\\n]'\n", + " cleaned_text = re.sub(pattern, '', text)\n", + " return cleaned_text\n", + "\n", + "data['Text'] = data['Text'].apply(remove_special_chars)\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Let's shuffle the dataset because of the engineering we did" + ], + "metadata": { + "id": "9nrCKECVzTc1" + } + }, + { + "cell_type": "code", + "source": [ + "data = data.sample(frac=1, random_state=42) # Frac=1 ensures all rows are included, random_state for reproducibility\n", + "\n", + "data.head()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "DzoXi7zLzOfE", + "outputId": "5898c6a0-43e8-422e-a8a1-404e140b4141" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Text Label\n", + "71787 I wish i had a ps5, still on the ps4 and can’t... negative\n", + "67218 Our air conditioner at my job is broken again 😭 negative\n", + "54066 Wait you don't think a true Trumper is easier ... positive\n", + "7168 ‘Too much risk’: why Erasmus students are shun... uncertainty\n", + "29618 Maybe they should try eating Lean Kwehsine. uncertainty" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TextLabel
71787I wish i had a ps5, still on the ps4 and can’t...negative
67218Our air conditioner at my job is broken again 😭negative
54066Wait you don't think a true Trumper is easier ...positive
7168‘Too much risk’: why Erasmus students are shun...uncertainty
29618Maybe they should try eating Lean Kwehsine.uncertainty
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C0aJWWkR6vPs" + }, + "source": [ + "# Let's Train a Bert Model with a quarter million Tweets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rkswR818hYyB" + }, + "source": [ + "#### Creating Tokenizer and Model\n", + "\n", + "Let's convert our text and label column into list for easy splitting\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vfmwX-BPhdY0" + }, + "outputs": [], + "source": [ + "X = data['Text'].to_list()\n", + "y = data['Label'].to_list()\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "label_encoder = LabelEncoder()\n", + "train_labels = label_encoder.fit_transform(y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hB4fe-dW9fus", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 145, + "referenced_widgets": [ + "402f8d55db9f4f87a0db4c5da2f0a042", + "b5cd0e17acbd400b91b240eea04ec270", + "e6de111c981b4bb79a768201c40aed81", + "a0b6d47cf42a461db0cecb4a64da160d", + "4aa2aef51b534fe08ffeed1ac7f2f894", + "997cd555fa7541bd9ea358dcb6d5a623", + "0658506d9ea849f6b1757c2beeec1374", + "534846a2e8674f2fa5ee12d7cd0cb864", + "e4c1d65d4f4b4ee28593459b5e08025f", + "f76cdc6af41a4f57962585fc831d4575", + "8afb6933d3b844998f497df85e677a71", + "cdaa91c4f234424cb86145eecc5d873e", + "3610d4ce76444049be7cb23bdf89ed6d", + "c350f8e508c9489fa085e5be96a06213", + "56e6ea4d36a349229f1e21e64fc24891", + "2e55173b72704af0b789f1cec6f0d302", + "fb496c9fdf4849cfbe02bb90dd937d0f", + "92da5a1db9e546728ea964327bf3d2c7", + "552e1d9d0eff456c9aed3ff69f462bcd", + "f0505e14608b4d2393d1b34fc3d6b02e", + "86081c4530494ae094faa7a7d62115bc", + "bc67028c4bd74ca58a066f3c4162691e", + "071e857ba588417791fe6041cfbb5eb1", + "ba77cc3dfec7445eb3037f5b557e67f6", + "8b8f8538dc414ab389c8d404ce6a6404", + "36f4a4ddba384dd6a98ae8222411553b", + "2b19b22d3f4d4bbc9684fbf9a057fe80", + "0ab6adb6bfa142bc8d4a95e9d9262f9e", + "e41835cba0e64e1c82b7b8d048a60009", + "9aca358d24ad4d0abde5216a466cf4a7", + "b60b4bde2ddc441e9aa065e02ddfc593", + "4464618ffba44244983bcf60b784cc31", + "12710c02945d4b25bfeb736c920fb02d", + "ebeb89455cc5499cb5e3235095c9f556", + "9c0ec9282a2d476989738f76ecd79c27", + "0f406d7294ac4d4bbaf2281aa2596c42", + "34f25a36c81b409a8aa93c64003ac7c6", + "c3f6e6c38b2d42979dba7e4d9dd9b8c4", + "1f693ad3e96d4f18ab9edccec2d951bc", + "89c62ad73874414bb01e9ce151fa4371", + "8994a1ae39674840a98e30f6006305d1", + "183286a1b6d34380a1121af6c4fe2c1b", + "295361afc085400cbbce4b2fcd48edc2", + "c05daba601cb4c4b83f9e0e2eb857875" + ] + }, + "outputId": "9ad0d4ac-e444-44c9-dc28-19205f2fadd0" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading (…)okenizer_config.json: 0%| | 0.00/28.0 [00:00