In [1]:
import pandas as pd
import re
from rapidfuzz import fuzz
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression


In [2]:
data = pd.read_csv('/Users/mac2/University/Machine Learning/VS Code /Test_repo/ROADWATCH/Road/messages_cleanedNEW.csv')
data.head()


Unnamed: 0,id,date,sender_id,message
0,1705274,2024-11-01T12:53:19+00:00,6415259000.0,شرطه نازله من عيلي
1,1705273,2024-11-01T12:52:29+00:00,6415259000.0,حادث سير بعد اشارات شيلو باتجاه عيلي
2,1705272,2024-11-01T12:48:24+00:00,6415259000.0,المنشية تواجد جيش مع شرطة ومخالفات
3,1705271,2024-11-01T12:44:55+00:00,6415259000.0,الرجاء من كل شخص بقرا التقرير يكبس ❤️ أو 👍\n\n...
4,1705270,2024-11-01T12:39:22+00:00,6415259000.0,تم فتح حاجز عناب الان بالاتجاهين ✅✅✅✅


In [3]:
def remove_emojis(text):
    if isinstance(text, str):
        return re.sub(r'[^\w\s,.!?؛:\n]', '', text)
    return text

data['message'] = data['message'].apply(remove_emojis)
data.head()


Unnamed: 0,id,date,sender_id,message
0,1705274,2024-11-01T12:53:19+00:00,6415259000.0,شرطه نازله من عيلي
1,1705273,2024-11-01T12:52:29+00:00,6415259000.0,حادث سير بعد اشارات شيلو باتجاه عيلي
2,1705272,2024-11-01T12:48:24+00:00,6415259000.0,المنشية تواجد جيش مع شرطة ومخالفات
3,1705271,2024-11-01T12:44:55+00:00,6415259000.0,الرجاء من كل شخص بقرا التقرير يكبس أو \n\nتفا...
4,1705270,2024-11-01T12:39:22+00:00,6415259000.0,تم فتح حاجز عناب الان بالاتجاهين


In [4]:
def remove_jargon(data, jargon_terms):
    def clean_message(message):
        pattern = re.compile(r'\b(' + '|'.join(re.escape(term) for term in jargon_terms) + r')\b', re.IGNORECASE)
        return pattern.sub('', message)

    data['message'] = data['message'].apply(clean_message)
    data['message'] = data['message'].str.strip().replace(r'\s+', ' ', regex=True)
    return data

In [5]:
def regex_tokenize(data):
    sentence_splitter = re.compile(r'(?<=[.!?]) +')
    word_splitter = re.compile(r'[\u0600-\u06FF]+|\d+|\w+')

    sentences_list = []
    words_list = []
    original_messages = []
    word_indices = []
    sentence_count = 1

    for index, message in data['message'].items():
        if isinstance(message, str):
            sentences = sentence_splitter.split(message.strip())
            for sentence in sentences:
                words = word_splitter.findall(sentence)
                for word_index, word in enumerate(words):
                    sentences_list.append(f"Sentence: {sentence_count} (Message {index})")
                    words_list.append(word)
                    original_messages.append(message)
                    word_indices.append(word_index + 1)
                sentence_count += 1
        
    tokenized_df = pd.DataFrame({
        'Sentence #': sentences_list,
        'Word': words_list,
        'Original Message': original_messages,
        'Word Index': word_indices
    })

    return tokenized_df

tokenized_df = regex_tokenize(data)
tokenized_df[['Sentence', 'Message']] = tokenized_df['Sentence #'].str.extract(r'Sentence: (\d+) \(Message (\d+)\)')
tokenized_df.drop(columns=['Sentence #'], inplace=True)
tokenized_df


Unnamed: 0,Word,Original Message,Word Index,Sentence,Message
0,شرطه,شرطه نازله من عيلي,1,1,0
1,نازله,شرطه نازله من عيلي,2,1,0
2,من,شرطه نازله من عيلي,3,1,0
3,عيلي,شرطه نازله من عيلي,4,1,0
4,حادث,حادث سير بعد اشارات شيلو باتجاه عيلي,1,2,1
...,...,...,...,...,...
303763,وتفتيش,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,6,32976,33988
303764,سياره,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,7,32976,33988
303765,سياره,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,8,32976,33988
303766,عورتا,عورتا لداخل,1,32977,33990


In [6]:
from rapidfuzz import fuzz

location_names = [
    "المنشية", "يتسهار", "صرة", "زعترة", "عصيرة", "المساكن", "المربعة", 
    "بوابة بورين", "دير شرف", "عورتا", "بيت فوريك", "بزاريا", "شافي شمرون",
    "حومش", "عناب", "الكفريات", "حجة", "وادي قانا", "بيت ليد", "كفر لاقف", 
    "الحمرا", "عوفرا", "ارئيل", "سلفيت", "رام الله", "نابلس", "عين سينيا", 
    "عطارة", "روابي", "عيون الحرامية", "ترمسعيا", "سنجل", "كركر", "بيت ايل", 
    "عزون", "جماعين", "كفل حارس", "مردا", "اللبن الشرقية", "بديا", "برقين", 
    "الزعيم", "العيزرية", "قلنديا", "جبع", "الكونتينر", "عش الغراب", 
    "مخماس", "جيت", "بيتا", "دير بلوط", "ياسوف", "النبي الياس", "سعير", 
    "حلحول", "حوارة", "راس الجورة", "عناتا", "حزما", "اماتين", "بيرزيت", 
    "النبي صالح", "افرات", "تقوع", "دورا", "عتصيون", "العروب", "فرش الهوى", 
    "حاجز النفق", "شارع بيجن", "الخليل", "واد النار", "DCO", "بيت جالا", 
    "اريحا", "برقة", "الغرس", "يتما", "الساوية", "يبرود", "سلواد", 
    "الطيبة", "النشاش", "طولكرم", "المسعودية", "بيت ايل", "جنين", 
    "قلقيلية", "عيلي", "كدوميم", "مخيم الجلزون", "روجيب", "شعفاط", 
    "الرام", "بيت لحم", "حوارة", "قبلان", "عينبوس", "بيت عور", "القرع", 
    "اودلا", "معالي ادوميم", "حزما", "السيلة", "نور شمس", "بلعا", 
    "الجفتلك", "الباذان", "البيرة", "بركان", "حبلة", "حرميش", "طوباس", 
    "السيلة", "افرايم", "ضواحي القدس", "القدس", "العبيدية", "بيت ساحور", 
    "تل الربيع", "يطا", "الظاهرية", "ترقوميا", "دير شرف", "شقبا", 
    "النفق", "شيلو", "عطروت", "حلميش", "المشتل", "عقربا", "قلنديا", 
    "سبسطية", "الفوار", "ادوميم", "العبيدية", "جين صافوت", "زواتا", 
    "حومش", "دير استيا", "العوجا", "بيت امر", "قباطية", "الجلزون", 
    "عطارة", "الخروبة", "عارورة", "الاحراش", "عصيرة الشمالية", "جيوس", "عين يبرود", 
    "عنبتا", "الناقورة", "الدهيشة", "بيت امرين", "قوصين", "عناتا", 
    "باب الزاوية", "الصيرفي", "صوريف", "ضاحية الريحان", "ابو ديس", "سالم", 
    "العيسوية", "بيت حنينا", "البيرة", "الاسكانات", "الزاوية", "النصارية",
    "واد قانا", "المصانع", "كفر عقب", "بير زيت", "عصيرة القبلية", "الفندق"
]

close_words = [
    'مغلق', 'مغلقة', 'وقوف', 'واقفه', 'سكروه', 'مسكره', 'سكر', 
    'مسكرين', 'متوقفه', 'اغلاق', 'اغلاقه'
]
open_words = [
    'فتح', 'فاضي', 'سالك', 'سالكه', 'فاتحة', 'فاتح', 'سلك', 'فتحت',
    'فاتحين', 'بمشو', 'مفتوحة', 'بسلك', 'بمشي', 'مشوا', 'فتحوها', 'سالكات', 'وسلك',
    'بالاتجاهين', 'بكل الاتجاهات', 'للداخل والطالع', 'للجهتين'
]
B_inside = ['للداخل', 'للفايت', 'فايت', 'داخل']
B_outside = ['للخارج', 'للطالع', 'طالع']

def regex_tokenize_with_similarity(data, location_names, similarity_threshold=80, close_threshold=80, open_threshold=80):
    sentence_splitter = re.compile(r'(?<=[.!?]) +')
    word_splitter = re.compile(r'[\u0600-\u06FF]+|\d+|\w+')

    sentences_list = []
    words_list = []
    original_messages = []
    word_indices = []
    labels = []
    similarities = []
    sentence_count = 1

    for index, message in data['message'].items():
        if isinstance(message, str):
            sentences = sentence_splitter.split(message.strip())
            for sentence in sentences:
                words = word_splitter.findall(sentence)
                
                i = 0
                while i < len(words):
                    if i + 1 < len(words) and f"{words[i]} {words[i+1]}" in location_names:
                        labels.extend(["B-LOC", "I-LOC"])
                        similarities.extend([100, 100])  
                        
                        sentences_list.extend([f"Sentence: {sentence_count} (Message {index})"] * 2)
                        words_list.extend([words[i], words[i + 1]])
                        original_messages.extend([message] * 2)
                        word_indices.extend([i + 1, i + 2])
                        
                        i += 2
                        continue
                    elif words[i] in location_names:
                        labels.append("B-LOC")
                        similarities.append(100)
                    elif words[i] in B_inside:
                        labels.append("B-inside")
                        similarities.append(100)
                    elif words[i] in B_outside:
                        labels.append("B-outside")
                        similarities.append(100)
                    else:
                        max_close_similarity = max(fuzz.ratio(words[i], word) for word in close_words)
                        if max_close_similarity >= close_threshold:
                            labels.append("B-close")
                            similarities.append(max_close_similarity)
                        else:
                            max_open_similarity = max(fuzz.ratio(words[i], word) for word in open_words)
                            if max_open_similarity >= open_threshold:
                                labels.append("B-open")
                                similarities.append(max_open_similarity)
                            else:
                                max_loc_similarity = max(fuzz.ratio(words[i], loc) for loc in location_names)
                                labels.append("B-LOC" if max_loc_similarity >= similarity_threshold else "O")
                                similarities.append(max_loc_similarity if max_loc_similarity >= similarity_threshold else 0)

                    sentences_list.append(f"Sentence: {sentence_count} (Message {index})")
                    words_list.append(words[i])
                    original_messages.append(message)
                    word_indices.append(i + 1)
                    i += 1
                    
                sentence_count += 1

    tokenized_df = pd.DataFrame({
        'Sentence #': sentences_list,
        'Word': words_list,
        'Original Message': original_messages,
        'Word Index': word_indices,
        'label': labels,
        'Similarity Score': similarities
    })

    return tokenized_df

# Assume you have `location_names`, `B_inside`, `B_outside`, `close_words`, and `open_words` lists
tokenized_df = regex_tokenize_with_similarity(data, location_names)
tokenized_df[['Sentence', 'Message']] = tokenized_df['Sentence #'].str.extract(r'Sentence: (\d+) \(Message (\d+)\)')
tokenized_df.drop(columns=['Sentence #'], inplace=True)
tokenized_df


Unnamed: 0,Word,Original Message,Word Index,label,Similarity Score,Sentence,Message
0,شرطه,شرطه نازله من عيلي,1,O,0.0,1,0
1,نازله,شرطه نازله من عيلي,2,O,0.0,1,0
2,من,شرطه نازله من عيلي,3,O,0.0,1,0
3,عيلي,شرطه نازله من عيلي,4,B-LOC,100.0,1,0
4,حادث,حادث سير بعد اشارات شيلو باتجاه عيلي,1,O,0.0,2,1
...,...,...,...,...,...,...,...
303763,وتفتيش,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,6,O,0.0,32976,33988
303764,سياره,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,7,O,0.0,32976,33988
303765,سياره,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,8,O,0.0,32976,33988
303766,عورتا,عورتا لداخل,1,B-LOC,100.0,32977,33990


In [7]:
from sklearn.preprocessing import OrdinalEncoder

# Initialize OrdinalEncoder
word_encoder = OrdinalEncoder()
label_encoder = OrdinalEncoder()

# Fit and transform the 'Word' column
tokenized_df['Word_id'] = word_encoder.fit_transform(tokenized_df[['Word']])

# Fit and transform the 'label' column
tokenized_df['label_id'] = label_encoder.fit_transform(tokenized_df[['label']])

# Convert the encoded columns to integers for better readability
tokenized_df['Word_id'] = tokenized_df['Word_id'].astype(int)
tokenized_df['label_id'] = tokenized_df['label_id'].astype(int)

# Display the resulting DataFrame
tokenized_df


Unnamed: 0,Word,Original Message,Word Index,label,Similarity Score,Sentence,Message,Word_id,label_id
0,شرطه,شرطه نازله من عيلي,1,O,0.0,1,0,6646,6
1,نازله,شرطه نازله من عيلي,2,O,0.0,1,0,10055,6
2,من,شرطه نازله من عيلي,3,O,0.0,1,0,9903,6
3,عيلي,شرطه نازله من عيلي,4,B-LOC,100.0,1,0,7574,0
4,حادث,حادث سير بعد اشارات شيلو باتجاه عيلي,1,O,0.0,2,1,5455,6
...,...,...,...,...,...,...,...,...,...
303763,وتفتيش,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,6,O,0.0,32976,33988,11390,6
303764,سياره,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,7,O,0.0,32976,33988,6532,6
303765,سياره,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,8,O,0.0,32976,33988,6532,6
303766,عورتا,عورتا لداخل,1,B-LOC,100.0,32977,33990,7554,0


In [8]:
tokenized_df['label'].value_counts()

label
O            170249
B-LOC         63211
B-open        41262
I-LOC         13916
B-close        9043
B-outside      3686
B-inside       2401
Name: count, dtype: int64

In [9]:
tokenized_df

Unnamed: 0,Word,Original Message,Word Index,label,Similarity Score,Sentence,Message,Word_id,label_id
0,شرطه,شرطه نازله من عيلي,1,O,0.0,1,0,6646,6
1,نازله,شرطه نازله من عيلي,2,O,0.0,1,0,10055,6
2,من,شرطه نازله من عيلي,3,O,0.0,1,0,9903,6
3,عيلي,شرطه نازله من عيلي,4,B-LOC,100.0,1,0,7574,0
4,حادث,حادث سير بعد اشارات شيلو باتجاه عيلي,1,O,0.0,2,1,5455,6
...,...,...,...,...,...,...,...,...,...
303763,وتفتيش,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,6,O,0.0,32976,33988,11390,6
303764,سياره,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,7,O,0.0,32976,33988,6532,6
303765,سياره,الكونتير بدايه ازمه باتجاه الجنوب وتفتيش سياره...,8,O,0.0,32976,33988,6532,6
303766,عورتا,عورتا لداخل,1,B-LOC,100.0,32977,33990,7554,0


In [10]:
import wandb
wandb.init(project="ROADWATCH", name="CatBoostn_run")


[34m[1mwandb[0m: Currently logged in as: [33mehabsulima23[0m ([33mehabsulima23-an-najah-national-university[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [11]:
from catboost import CatBoostClassifier
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import wandb


# Assuming tokenized_df has 'Word' and 'label' columns
X = tokenized_df[['Word']]  # Features (Word column)
y = tokenized_df['label']   # Target (label column)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

# Initialize sample weights (modify if class imbalance exists)
sample_weights = np.ones(len(y_train), dtype=float)

# Initialize the CatBoost model
model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.1,
    depth=6,
    cat_features=[0],  # Specify 'Word' as categorical
    loss_function='MultiClass',
    custom_metric=['Accuracy', 'MultiClass']  # MultiClass metric replaces Logloss
)

# Fit the model
model.fit(
    X_train, y_train,
    eval_set=(X_test, y_test),
    verbose=100,  # Log progress every 100 iterations
    sample_weight=sample_weights
)

# Predict probabilities and labels
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)

# Retrieve unique class names directly from `y`
class_names = np.unique(y)

# Calculate ROC curve and AUC for each class
n_classes = len(class_names)
fpr, tpr, roc_auc = {}, {}, {}

for i, class_name in enumerate(class_names):
    fpr[i], tpr[i], _ = roc_curve((y_test == class_name).astype(int), y_proba[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Log classification report to W&B
class_report = classification_report(
    y_test, y_pred, target_names=class_names, output_dict=True
)
wandb.log({"classification_report": class_report})

# Log confusion matrix to W&B
conf_matrix = confusion_matrix(y_test, y_pred, labels=class_names)
wandb.sklearn.plot_confusion_matrix(y_test, y_pred, labels=class_names)

# Log ROC curves to W&B
plt.figure(figsize=(10, 8))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC = {roc_auc[i]:.2f})')

plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.grid()
wandb.log({"roc_curve": plt})
plt.close()  # Close the figure after logging

# Log training and test accuracy
train_accuracy = model.score(X_train, y_train)
test_accuracy = model.score(X_test, y_test)
wandb.log({"train_accuracy": train_accuracy, "test_accuracy": test_accuracy})

# Extract evaluation results
evals_result = model.get_evals_result()
train_losses = evals_result['learn']['Logloss']
eval_losses = evals_result['validation']['Logloss']

# Plot and log loss curves to W&B
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='Training Logloss')
plt.plot(eval_losses, label='Validation Logloss')
plt.xlabel('Iterations')
plt.ylabel('Logloss')
plt.title('Training and Validation Logloss')
plt.legend()
plt.grid()
wandb.log({"loss_curve": plt})
plt.close()  # Close the figure after logging

# Log accuracy curves if available
if 'Accuracy' in evals_result['learn']:
    train_acc_curve = evals_result['learn']['Accuracy']
    val_acc_curve = evals_result['validation']['Accuracy']

    plt.figure(figsize=(10, 6))
    plt.plot(train_acc_curve, label='Training Accuracy')
    plt.plot(val_acc_curve, label='Validation Accuracy')
    plt.xlabel('Iterations')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.grid()
    wandb.log({"accuracy_curve": plt})
    plt.close()  # Close the figure after logging

# Finish the W&B run
wandb.finish()


0:	learn: 1.4062474	test: 1.4029169	best: 1.4029169 (0)	total: 350ms	remaining: 5m 49s
100:	learn: 0.0600476	test: 0.0492843	best: 0.0492843 (100)	total: 17.1s	remaining: 2m 31s
200:	learn: 0.0593382	test: 0.0487041	best: 0.0487041 (200)	total: 32.2s	remaining: 2m 7s
300:	learn: 0.0591527	test: 0.0486334	best: 0.0486317 (291)	total: 47.3s	remaining: 1m 49s
400:	learn: 0.0590463	test: 0.0486147	best: 0.0486137 (370)	total: 1m 2s	remaining: 1m 33s
500:	learn: 0.0589935	test: 0.0486127	best: 0.0486091 (449)	total: 1m 17s	remaining: 1m 17s
600:	learn: 0.0589478	test: 0.0486017	best: 0.0486017 (600)	total: 1m 32s	remaining: 1m 1s
700:	learn: 0.0589189	test: 0.0486044	best: 0.0486017 (600)	total: 1m 47s	remaining: 45.9s
800:	learn: 0.0588939	test: 0.0485997	best: 0.0485993 (797)	total: 2m 2s	remaining: 30.5s
900:	learn: 0.0588810	test: 0.0486028	best: 0.0485993 (797)	total: 2m 17s	remaining: 15.1s
999:	learn: 0.0588629	test: 0.0485998	best: 0.0485993 (797)	total: 2m 29s	remaining: 0us

bestT


I found a path object that I don't think is part of a bar chart. Ignoring.



KeyError: 'Logloss'

In [76]:
import wandb
wandb.init(project="ROADWATCH", name="LOG_REG_TargetEncoding_run")


In [37]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Target Encoding
target_encoding = tokenized_df.groupby('Word')['label_id'].mean()
tokenized_df['Word_id_encoded'] = tokenized_df['Word'].map(target_encoding)

# Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    tokenized_df['Word_id_encoded'].values.reshape(-1, 1),  # Reshape for compatibility
    tokenized_df['label_id'],
    test_size=0.2,
    random_state=42
)

# Initialize Logistic Regression
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# Predict probabilities and labels
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)

# Decode class names
class_names = label_encoder.inverse_transform(np.arange(len(label_encoder.categories_[0])).reshape(-1, 1)).flatten()

# Calculate ROC curve and AUC for each class
n_classes = len(np.unique(y_train))
fpr = {}
tpr = {}
roc_auc = {}

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve((y_test == i).astype(int), y_proba[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Log classification report
class_report = classification_report(
    y_test, y_pred, target_names=class_names, output_dict=True
)
wandb.log({"classification_report": class_report})

# Log confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
wandb.sklearn.plot_confusion_matrix(y_test, y_pred, labels=class_names)

# Log ROC curves
plt.figure(figsize=(10, 8))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC = {roc_auc[i]:.2f})')

plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.grid()

# Log ROC curve to WandB
wandb.log({"roc_curve": plt})

# Log training accuracy
train_accuracy = model.score(X_train, y_train)
test_accuracy = model.score(X_test, y_test)
wandb.log({"train_accuracy": train_accuracy, "test_accuracy": test_accuracy})

# Log precision and recall per class
for cls, metrics in class_report.items():
    if cls in class_names:  # Ignore overall averages (precision/recall/F1-score)
        wandb.log({
            f"{cls}_precision": metrics["precision"],
            f"{cls}_recall": metrics["recall"],
            f"{cls}_f1-score": metrics["f1-score"]
        })

wandb.finish()


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


I found a path object that I don't think is part of a bar chart. Ignoring.



0,1
B-LOC_f1-score,▁
B-LOC_precision,▁
B-LOC_recall,▁
B-close_f1-score,▁
B-close_precision,▁
B-close_recall,▁
B-inside_f1-score,▁
B-inside_precision,▁
B-inside_recall,▁
B-open_f1-score,▁

0,1
B-LOC_f1-score,0.90339
B-LOC_precision,0.8596
B-LOC_recall,0.95188
B-close_f1-score,0.0
B-close_precision,0.0
B-close_recall,0.0
B-inside_f1-score,0.0
B-inside_precision,0.0
B-inside_recall,0.0
B-open_f1-score,0.88119
