In [58]:
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, f1_score
from sklearn.ensemble import VotingClassifier
import joblib
from sklearn.cluster import KMeans
from sklearn.decomposition import NMF
from sklearn.preprocessing import FunctionTransformer

In [2]:
df = pd.read_csv("../data/data.csv")
df.drop(columns=["Unnamed: 0"], inplace=True)
df.head()

Unnamed: 0,reviewerName,overall,reviewText,reviewTime,day_diff,helpful_yes,helpful_no,total_vote,score_pos_neg_diff,score_average_rating,wilson_lower_bound
0,,4.0,No issues.,2014-07-23,138,0,0,0,0,0.0,0.0
1,0mie,5.0,"Purchased this for my device, it worked as adv...",2013-10-25,409,0,0,0,0,0.0,0.0
2,1K3,4.0,it works as expected. I should have sprung for...,2012-12-23,715,0,0,0,0,0.0,0.0
3,1m2,5.0,This think has worked out great.Had a diff. br...,2013-11-21,382,0,0,0,0,0.0,0.0
4,2&amp;1/2Men,5.0,"Bought it with Retail Packaging, arrived legit...",2013-07-13,513,0,0,0,0,0.0,0.0


In [3]:
def apply_rating(num):
    if num >= 4:
        return "1"
    elif num <= 2:
        return "-1"
    else:
        return "0"
df["sentiment"] = df["overall"].apply(apply_rating)
df.head()

Unnamed: 0,reviewerName,overall,reviewText,reviewTime,day_diff,helpful_yes,helpful_no,total_vote,score_pos_neg_diff,score_average_rating,wilson_lower_bound,sentiment
0,,4.0,No issues.,2014-07-23,138,0,0,0,0,0.0,0.0,1
1,0mie,5.0,"Purchased this for my device, it worked as adv...",2013-10-25,409,0,0,0,0,0.0,0.0,1
2,1K3,4.0,it works as expected. I should have sprung for...,2012-12-23,715,0,0,0,0,0.0,0.0,1
3,1m2,5.0,This think has worked out great.Had a diff. br...,2013-11-21,382,0,0,0,0,0.0,0.0,1
4,2&amp;1/2Men,5.0,"Bought it with Retail Packaging, arrived legit...",2013-07-13,513,0,0,0,0,0.0,0.0,1


In [4]:
def clean_text(text):
    text = text.replace("\n", " ")
    text = re.sub(r"[^a-z\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

In [5]:
df.isna().sum()

reviewerName            1
overall                 0
reviewText              1
reviewTime              0
day_diff                0
helpful_yes             0
helpful_no              0
total_vote              0
score_pos_neg_diff      0
score_average_rating    0
wilson_lower_bound      0
sentiment               0
dtype: int64

In [6]:
df.dropna(inplace=True)
df.isna().sum()

reviewerName            0
overall                 0
reviewText              0
reviewTime              0
day_diff                0
helpful_yes             0
helpful_no              0
total_vote              0
score_pos_neg_diff      0
score_average_rating    0
wilson_lower_bound      0
sentiment               0
dtype: int64

In [7]:
word_vectorizer = TfidfVectorizer(
    preprocessor=clean_text,
    lowercase=True,
    stop_words="english",
    ngram_range=(1, 2),
    min_df=5,
    max_df=0.9,
    max_features=30000
)

chat_vactorizer = TfidfVectorizer(
    analyzer="char",
    preprocessor=clean_text,
    lowercase=True,
    ngram_range=(2, 4   ),
    min_df=5,
    max_df=0.9,
    max_features=30000
)

vectorizer = FeatureUnion([
    ("word", word_vectorizer),
    ("char", chat_vactorizer)
])
X = df[["reviewText"]]
y = df["sentiment"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,stratify=y)
X_train_vec = vectorizer.fit_transform(X_train["reviewText"])
X_test_vec = vectorizer.transform(X_test["reviewText"])

In [8]:
X_train_char_vec = chat_vactorizer.fit_transform(X_train["reviewText"])
X_test_char_vec = chat_vactorizer.transform(X_test["reviewText"])

In [9]:
model1 = LogisticRegression(C=1.0, class_weight='balanced', max_iter=1000, random_state=42)
model2 = LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000, random_state=42)
model3 = LogisticRegression(C=10.0, solver='saga', penalty='l1', class_weight='balanced', max_iter=1000, random_state=42)
ensemble = VotingClassifier(
    estimators=[
        ('lr_std', model1),
        ('lr_low_c', model2),
        ('lr_l1', model3)
    ],
    voting='soft'
)

In [10]:
pipe = Pipeline([
    ("vectorizer", vectorizer),
    ("ensemble", ensemble)
])
pipe.fit(X_train["reviewText"], y_train)
y_pred = pipe.predict(X_test["reviewText"])
y_pred_proba = pipe.predict_proba(X_test["reviewText"])

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(
    "ROC AUC Score:",
    roc_auc_score(
        y_test,
        y_pred_proba,
        multi_class="ovr",
        average="macro"
    )
)
print("F1 Score:", f1_score(y_test, y_pred, average="macro"))



              precision    recall  f1-score   support

          -1       0.56      0.69      0.62        65
           0       0.05      0.04      0.04        28
           1       0.96      0.95      0.95       890

    accuracy                           0.91       983
   macro avg       0.52      0.56      0.54       983
weighted avg       0.90      0.91      0.90       983

[[ 45   3  17]
 [  6   1  21]
 [ 30  15 845]]
ROC AUC Score: 0.8766857358857152
F1 Score: 0.5373927456270146


In [11]:
X1 = pd.DataFrame({"reviewText": ["dont buy"]})
pipe.predict(X1["reviewText"])

array(['-1'], dtype=object)

In [12]:
word1 = TfidfVectorizer(
    preprocessor=clean_text,
    lowercase=True,
    ngram_range=(1,2),
    stop_words="english",
    max_features=20000,
    min_df=5,
    max_df=0.9,
    sublinear_tf=True
)


char1 = TfidfVectorizer(
    preprocessor=clean_text,
    analyzer="char",
    ngram_range=(3,5),
    max_features=20000,
    min_df=5,
    max_df=0.9,
    sublinear_tf=True
)
char2 = TfidfVectorizer(
    preprocessor=clean_text,
    analyzer="char",
    ngram_range=(2,4),
    max_features=15000,
    sublinear_tf=True
)
word2 = TfidfVectorizer(
    preprocessor=clean_text,
    lowercase=True,
    ngram_range=(1,1),
    stop_words="english",
    max_features=15000,
    sublinear_tf=True,
    min_df=5,
    max_df=0.9,
)
pipe1 = Pipeline([
    ("vectorizer", FeatureUnion([("word", word1), ("char", char1)])),
    ("model", LogisticRegression(C=1.0, class_weight='balanced', max_iter=1000, random_state=42))
])
pipe2 = Pipeline([
    ("vectorizer", FeatureUnion([("word", word2), ("char", char1)])),
    ("model", LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000, random_state=42))
])
pipe3 = Pipeline([
    ("vectorizer", char2),
    ("model", LogisticRegression(C=10.0, solver='saga', class_weight='balanced', max_iter=1000, random_state=42))
])
ensemble = VotingClassifier(
    estimators=[
        ('pipe1', pipe1),
        ('pipe2', pipe2),
        ('pipe3', pipe3)
    ],
    voting='soft'
)
ensemble.fit(X_train["reviewText"], y_train)
y_pred = ensemble.predict(X_test["reviewText"])
y_pred_proba = ensemble.predict_proba(X_test["reviewText"])
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(
    "ROC AUC Score:",
    roc_auc_score(
        y_test,
        y_pred_proba,
        multi_class="ovr",
        average="macro"
    )
)
print("F1 Score:", f1_score(y_test, y_pred, average="macro"))



              precision    recall  f1-score   support

          -1       0.64      0.55      0.60        65
           0       0.10      0.36      0.15        28
           1       0.97      0.90      0.93       890

    accuracy                           0.86       983
   macro avg       0.57      0.60      0.56       983
weighted avg       0.93      0.86      0.89       983

[[ 36  19  10]
 [  5  10  13]
 [ 15  75 800]]
ROC AUC Score: 0.88373014109539
F1 Score: 0.56019677751886


In [15]:
joblib.dump(ensemble, "../models/sentimental_model.pkl")

['../models/sentimental_model.pkl']

----

In [60]:
topic_names = {
    0: "SD/Memory Cards",
    1: "Product Functionality / Satisfaction",
    2: "Samsung Devices",
    3: "Micro SD Cards",
    4: "SanDisk Brand / Quality",
    5: "Memory / Storage",
    6: "Price & Quality",
    7: "Card Class / Speed (Class 10, UHS)",
    8: "Tablets / Android Tablets",
    9: "GoPro Cameras",
    10: "Works Fine / No Issues",
    11: "Phones / Smartphones",
    12: "Price / Good Deal",
    13: "Memory Size / GB",
    14: "Product / Recommendations",
    15: "Speed / Reliability",
    16: "Usage / Ease of Use",
    17: "Samsung Note Series",
    18: "Basic Usage / Simple Functions",
    19: "Performance / Does Job",
    20: "Read/Write Speed",
    21: "Media / Music & Videos",
    22: "Delivery / Time",
    23: "Samsung Galaxy S3",
    24: "Samsung Galaxy S4",
    25: "Problems / Issues",
    26: "Misc / Packaging & Labels",
    27: "Cards / Multiple Types",
    28: "Issues / Complaints",
    29: "Microsoft Surface / Tablets",
    30: "Adapters / Accessories",
    31: "Cameras / Video Recording",
    32: "Purchases / Bought Items",
    33: "Worked / Success Stories",
    34: "Feedback / User Opinions",
    35: "Storage Capacity / Space",
    36: "Buying / Recommendations",
    37: "Recommendations / Highly Recommended",
    38: "Likes / User Satisfaction",
    39: "Usage History / Past Experience",
    40: "Samsung Galaxy Tab",
    41: "Perfect / Fits Well",
    42: "Formatting / File System",
    43: "Purchases / Orders",
    44: "File Transfer / Speed",
    45: "Data Handling / Loss",
    46: "Disk / Brand Quality",
    47: "Work / Functionality",
    48: "Storage Space / Extra Room",
    49: "Perfectly / No Problems"
}

In [61]:
n_topics = 50
tfidf = TfidfVectorizer(max_features=5000, min_df=5, max_df=0.95, stop_words="english", ngram_range=(1, 2))
nmf = NMF(n_components=n_topics, random_state=42, max_iter=200)

def map_topics(i):
    topic_ids = i.argmax(axis=1)
    return np.array([topic_names[i] for i in topic_ids]).reshape(-1, 1)

pipeline = Pipeline([
    ("tfidf", tfidf),
    ("nmf", nmf),
    ("mapper", FunctionTransformer(map_topics, validate=False))
])

texts = df["reviewText"].fillna("").astype(str)
texts = texts[texts.str.strip().str.len() > 0]
pipeline.fit(texts)

0,1,2
,"steps  steps: list of tuples List of (name of step, estimator) tuples that are to be chained in sequential order. To be compatible with the scikit-learn API, all steps must define `fit`. All non-last steps must also define `transform`. See :ref:`Combining Estimators ` for more details.","[('tfidf', ...), ('nmf', ...), ...]"
,"transform_input  transform_input: list of str, default=None The names of the :term:`metadata` parameters that should be transformed by the pipeline before passing it to the step consuming it. This enables transforming some input arguments to ``fit`` (other than ``X``) to be transformed by the steps of the pipeline up to the step which requires them. Requirement is defined via :ref:`metadata routing `. For instance, this can be used to pass a validation set through the pipeline. You can only set this if metadata routing is enabled, which you can enable using ``sklearn.set_config(enable_metadata_routing=True)``. .. versionadded:: 1.6",
,"memory  memory: str or object with the joblib.Memory interface, default=None Used to cache the fitted transformers of the pipeline. The last step will never be cached, even if it is a transformer. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. See :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py` for an example on how to enable caching.",
,"verbose  verbose: bool, default=False If True, the time elapsed while fitting each step will be printed as it is completed.",False

0,1,2
,"input  input: {'filename', 'file', 'content'}, default='content' - If `'filename'`, the sequence passed as an argument to fit is  expected to be a list of filenames that need reading to fetch  the raw content to analyze. - If `'file'`, the sequence items must have a 'read' method (file-like  object) that is called to fetch the bytes in memory. - If `'content'`, the input is expected to be a sequence of items that  can be of type string or byte.",'content'
,"encoding  encoding: str, default='utf-8' If bytes or files are given to analyze, this encoding is used to decode.",'utf-8'
,"decode_error  decode_error: {'strict', 'ignore', 'replace'}, default='strict' Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'.",'strict'
,"strip_accents  strip_accents: {'ascii', 'unicode'} or callable, default=None Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have a direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) means no character normalization is performed. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`.",
,"lowercase  lowercase: bool, default=True Convert all characters to lowercase before tokenizing.",True
,"preprocessor  preprocessor: callable, default=None Override the preprocessing (string transformation) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer`` is not callable.",
,"tokenizer  tokenizer: callable, default=None Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``.",
,"analyzer  analyzer: {'word', 'char', 'char_wb'} or callable, default='word' Whether the feature should be made of word or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21  Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data  is first read from the file and then passed to the given callable  analyzer.",'word'
,"stop_words  stop_words: {'english'}, list, default=None If a string, it is passed to _check_stop_list and the appropriate stop list is returned. 'english' is currently the only supported string value. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``. If None, no stop words will be used. In this case, setting `max_df` to a higher value, such as in the range (0.7, 1.0), can automatically detect and filter stop words based on intra corpus document frequency of terms.",'english'
,"token_pattern  token_pattern: str, default=r""(?u)\\b\\w\\w+\\b"" Regular expression denoting what constitutes a ""token"", only used if ``analyzer == 'word'``. The default regexp selects tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted.",'(?u)\\b\\w\\w+\\b'

0,1,2
,"n_components  n_components: int or {'auto'} or None, default='auto' Number of components. If `None`, all features are kept. If `n_components='auto'`, the number of components is automatically inferred from W or H shapes. .. versionchanged:: 1.4  Added `'auto'` value. .. versionchanged:: 1.6  Default value changed from `None` to `'auto'`.",50
,"init  init: {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None Method used to initialize the procedure. Valid options: - `None`: 'nndsvda' if n_components <= min(n_samples, n_features),  otherwise random. - `'random'`: non-negative random matrices, scaled with:  `sqrt(X.mean() / n_components)` - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)  initialization (better for sparseness) - `'nndsvda'`: NNDSVD with zeros filled with the average of X  (better when sparsity is not desired) - `'nndsvdar'` NNDSVD with zeros filled with small random values  (generally faster, less accurate alternative to NNDSVDa  for when sparsity is not desired) - `'custom'`: Use custom matrices `W` and `H` which must both be provided. .. versionchanged:: 1.1  When `init=None` and n_components is less than n_samples and n_features  defaults to `nndsvda` instead of `nndsvd`.",
,"solver  solver: {'cd', 'mu'}, default='cd' Numerical solver to use: - 'cd' is a Coordinate Descent solver. - 'mu' is a Multiplicative Update solver. .. versionadded:: 0.17  Coordinate Descent solver. .. versionadded:: 0.19  Multiplicative Update solver.",'cd'
,"beta_loss  beta_loss: float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius' Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input matrix X cannot contain zeros. Used only in 'mu' solver. .. versionadded:: 0.19",'frobenius'
,"tol  tol: float, default=1e-4 Tolerance of the stopping condition.",0.0001
,"max_iter  max_iter: int, default=200 Maximum number of iterations before timing out.",200
,"random_state  random_state: int, RandomState instance or None, default=None Used for initialisation (when ``init`` == 'nndsvdar' or 'random'), and in Coordinate Descent. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `.",42
,"alpha_W  alpha_W: float, default=0.0 Constant that multiplies the regularization terms of `W`. Set it to zero (default) to have no regularization on `W`. .. versionadded:: 1.0",0.0
,"alpha_H  alpha_H: float or ""same"", default=""same"" Constant that multiplies the regularization terms of `H`. Set it to zero to have no regularization on `H`. If ""same"" (default), it takes the same value as `alpha_W`. .. versionadded:: 1.0",'same'
,"l1_ratio  l1_ratio: float, default=0.0 The regularization mixing parameter, with 0 <= l1_ratio <= 1. For l1_ratio = 0 the penalty is an elementwise L2 penalty (aka Frobenius Norm). For l1_ratio = 1 it is an elementwise L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. .. versionadded:: 0.17  Regularization parameter *l1_ratio* used in the Coordinate Descent  solver.",0.0

0,1,2
,"func  func: callable, default=None The callable to use for the transformation. This will be passed the same arguments as transform, with args and kwargs forwarded. If func is None, then func will be the identity function.",<function map...t 0x11fc02980>
,"inverse_func  inverse_func: callable, default=None The callable to use for the inverse transformation. This will be passed the same arguments as inverse transform, with args and kwargs forwarded. If inverse_func is None, then inverse_func will be the identity function.",
,"validate  validate: bool, default=False Indicate that the input X array should be checked before calling ``func``. The possibilities are: - If False, there is no input validation. - If True, then X will be converted to a 2-dimensional NumPy array or  sparse matrix. If the conversion is not possible an exception is  raised. .. versionchanged:: 0.22  The default of ``validate`` changed from True to False.",False
,"accept_sparse  accept_sparse: bool, default=False Indicate that func accepts a sparse matrix as input. If validate is False, this has no effect. Otherwise, if accept_sparse is false, sparse matrix inputs will cause an exception to be raised.",False
,"check_inverse  check_inverse: bool, default=True Whether to check that or ``func`` followed by ``inverse_func`` leads to the original inputs. It can be used for a sanity check, raising a warning when the condition is not fulfilled. .. versionadded:: 0.20",True
,"feature_names_out  feature_names_out: callable, 'one-to-one' or None, default=None Determines the list of feature names that will be returned by the `get_feature_names_out` method. If it is 'one-to-one', then the output feature names will be equal to the input feature names. If it is a callable, then it must take two positional arguments: this `FunctionTransformer` (`self`) and an array-like of input feature names (`input_features`). It must return an array-like of output feature names. The `get_feature_names_out` method is only defined if `feature_names_out` is not None. See ``get_feature_names_out`` for more details. .. versionadded:: 1.1",
,"kw_args  kw_args: dict, default=None Dictionary of additional keyword arguments to pass to func. .. versionadded:: 0.18",
,"inv_kw_args  inv_kw_args: dict, default=None Dictionary of additional keyword arguments to pass to inverse_func. .. versionadded:: 0.18",


In [None]:
joblib.dump(pipeline, "../models/topic_model.pkl")

---

In [None]:
texts_for_tfidf = df["reviewText"].fillna("").astype(str)
texts_for_tfidf = texts_for_tfidf[texts_for_tfidf.str.strip().str.len() > 0]

summarizer_tfidf = TfidfVectorizer(stop_words="english", max_features=10000)
summarizer_tfidf.fit(texts_for_tfidf)
feature_names = summarizer_tfidf.get_feature_names_out()
idf = summarizer_tfidf.idf_
word2idf = dict(zip(feature_names, idf))

def _split_sentences(text):
    parts = re.split(r"(?<=[.!?])\s+", text)
    return [s.strip() for s in parts if s.strip()]

def _tokenize_words(sent):
    return re.findall(r"\b\w+\b", sent.lower())

def summarize_review(text, word2idf=word2idf, top_k=1):
    text = str(text).strip()
    if not text:
        return ""
    sentences = _split_sentences(text)
    if len(sentences) <= top_k:
        return text
    scores = []
    for sent in sentences:
        words = _tokenize_words(sent)
        weights = [word2idf.get(w, 0) for w in words if w in word2idf]
        score = np.mean(weights) if weights else 0
        scores.append(score)
    top_indices = np.argsort(scores)[-top_k:][::-1]
    return " ".join(sentences[i] for i in sorted(top_indices))


df["summary"] = df["reviewText"].apply(lambda x: summarize_review(x, word2idf=word2idf, top_k=1))


summarizer_model = {"tfidf": summarizer_tfidf, "word2idf": word2idf}
joblib.dump(summarizer_model, "../models/summarizer_model.pkl")

Модель саммаризации сохранена в models/summarizer_model.pkl


In [None]:
loaded = joblib.load("../models/summarizer_model.pkl")
summary_new = summarize_review("Great product, fast delivery. Battery life is amazing. Would buy again.", word2idf=loaded["word2idf"], top_k=2)
print("Новый отзыв → саммари:", summary_new)

Новый отзыв → саммари: Great product, fast delivery. Battery life is amazing.


In [72]:
# Примеры саммаризации (отзыв → краткое саммари)
for idx in df.index[:6]:
    full = df.loc[idx, "reviewText"]
    short = df.loc[idx, "summary"]
    print(f"Отзыв: {str(full)[:120]}{'...' if len(str(full)) > 120 else ''}")
    print(f"Саммари: {short}")
    print("-" * 60)

Отзыв: Purchased this for my device, it worked as advertised. You can never have too much phone memory, since I download a lot ...
Саммари: You can never have too much phone memory, since I download a lot of stuff this was a no brainer for me.
------------------------------------------------------------
Отзыв: it works as expected. I should have sprung for the higher capacity.  I think its made a bit cheesier than the earlier ve...
Саммари: I think its made a bit cheesier than the earlier versions; the paint looks not as clean as before
------------------------------------------------------------
Отзыв: This think has worked out great.Had a diff. bran 64gb card and if went south after 3 months.This one has held up pretty ...
Саммари: bran 64gb card and if went south after 3 months.This one has held up pretty well since I had my S3, now on my Note3.*** update 3/21/14I've had this for a few months and have had ZERO issue's since it was transferred from my S3 to my Note3 and into a note2.