<a href="https://colab.research.google.com/github/ZHUTING0522/chiikawa/blob/main/Chiikawa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-learn matplotlib mecab-python3

In [None]:
!apt-get install -y mecab libmecab-dev mecab-ipadic-utf8

In [34]:
import pandas as pd
import re
import MeCab
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

In [None]:
import pandas as pd
df = pd.read_csv('/content/chiikawa_posts.csv')
print(df.head())

In [43]:
# Preprocessing function
def preprocess_text(text):
    mecab = MeCab.Tagger("-Owakati")
    text = re.sub(r'http\S+|www\S+', '', text)  # Remove URLs
    text = re.sub(r'[\d\W]+', ' ', text)  # Remove non-alphanumeric characters
    return mecab.parse(text).strip()

In [44]:
# Visualization function
def visualize_sentiment_distribution(data):
    sentiment_counts = data['sentiment'].value_counts()
    plt.figure(figsize=(8, 6))
    sentiment_counts.plot.pie(autopct='%1.1f%%', startangle=140, colors=['#66c2a5', '#fc8d62', '#8da0cb'])
    plt.title('Sentiment Distribution')
    plt.ylabel('')
    plt.show()

In [None]:
# Main function
def main():
    # Load data
    df = pd.read_csv('chiikawa_posts.csv')
    df['cleaned_content'] = df['content'].apply(preprocess_text)

    # Feature extraction
    vectorizer = TfidfVectorizer(max_features=5000)
    X = vectorizer.fit_transform(df['cleaned_content'])
    y = df['sentiment']

    # Train/test split and model training
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train)

    # Evaluation
    y_pred = model.predict(X_test)
    print("Classification Report:\n", classification_report(y_test, y_pred))

    # Visualization
    visualize_sentiment_distribution(df)

    # Example prediction
    new_post = "ちいかわの新しいグッズは素敵すぎる！"
    cleaned_post = preprocess_text(new_post)
    vectorized_post = vectorizer.transform([cleaned_post])
    sentiment = model.predict(vectorized_post)[0]
    print(f"Predicted sentiment for the post: {sentiment}")

if __name__ == "__main__":
    main()
