In [155]:
import pandas as pd
import numpy as np
import gensim
from gensim.models import Word2Vec
import nltk
import string
import os
import collections
import smart_open
import random
import datetime
import json
import heapq
import sys

# Understand the table

In [2]:

post_df = pd.read_csv('all_posts.csv', sep = "\t")
post_df.head()

Unnamed: 0,Id,PostTypeId,ParentId,AcceptedAnswerId,CreationDate,Score,ViewCount,Body,OwnerUserId,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount
0,1,1,,15.0,2010-07-19T19:12:12.510,36,2577.0,How should I elicit prior distributions from e...,8.0,2010-09-15T21:08:26.077,Eliciting priors from experts,<bayesian><prior><elicitation>,5.0,1,23.0
1,2,1,,59.0,2010-07-19T19:12:57.157,29,23368.0,In many different statistical methods there is...,24.0,2016-06-27T06:44:40.147,What is normality?,<distributions><normality>,7.0,1,10.0
2,3,1,,5.0,2010-07-19T19:13:28.577,66,5792.0,What are some valuable Statistical Analysis op...,18.0,2013-05-27T14:48:36.927,What are some valuable Statistical Analysis op...,<software><open-source>,19.0,4,39.0
3,4,1,,135.0,2010-07-19T19:13:31.617,17,26414.0,I have two groups of data. Each with a differ...,23.0,2010-09-08T03:00:19.690,Assessing the significance of differences in d...,<distributions><statistical-significance>,5.0,2,5.0
4,5,2,3.0,,2010-07-19T19:14:43.050,87,,The R-project\n\nhttp://www.r-project.org/\n\n...,23.0,2010-07-19T19:21:15.063,,,,3,


In [3]:
post_df.tail()

Unnamed: 0,Id,PostTypeId,ParentId,AcceptedAnswerId,CreationDate,Score,ViewCount,Body,OwnerUserId,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount
215957,299976,2,299970.0,,2017-08-27T01:55:50.117,0,,\n We cannot simulate separate values (becaus...,8336.0,2017-08-27T01:55:50.117,,,,0,
215958,299977,2,299963.0,,2017-08-27T02:07:55.553,0,,"Neural networks could ""learn"" where the labels...",30621.0,2017-08-27T02:07:55.553,,,,0,
215959,299978,2,299669.0,,2017-08-27T03:26:04.497,0,,"Calculating $\displaystyle\hat{t}_i=\int t\, p...",8336.0,2017-08-27T03:26:04.497,,,,0,
215960,299979,1,,,2017-08-27T03:52:26.230,0,2.0,I have collected primary data of BISP with hel...,175092.0,2017-08-27T03:52:26.230,What should be the solution of insignificant (...,<statistical-significance>,0.0,0,
215961,299980,2,299446.0,,2017-08-27T04:05:22.103,0,,While it's possible to combine word embeddings...,107579.0,2017-08-27T04:05:22.103,,,,0,


### How many posts?

In [4]:
n = post_df.shape[0]
n

215962

In [5]:
len(post_df['Id'].unique()) == post_df.shape[0] 

True

### How many questions?

In [6]:
post_df['PostTypeId'].value_counts()

1    108954
2    104797
5      1091
4      1091
6        18
3         6
7         5
Name: PostTypeId, dtype: int64

In [7]:
Type1 = post_df.loc[post_df['PostTypeId'] == 1]
Type1.shape

(108954, 15)

### Contents of questions

In [8]:
len(Type1['Body'].unique()) 

108937

In [9]:
sum(Type1['Body'].isnull())

0


So the unique values of "Body" is less than the number of questions. This is because some asked their
questions more than once and maybe the titles are different, but the contents are exactly the same.


### Confirm answers to questions.

I found the answers to question_id = 1 online and get the rows from the table to check that their parentId is indeed 1. 
But only on answer is accepted and is called accepted_answer. 

In [10]:
answer = 'John Cook gives some interesting recommendations.' # answer to question id = 1
post_df.loc[post_df['Body'].apply(lambda x: answer in str(x))]

Unnamed: 0,Id,PostTypeId,ParentId,AcceptedAnswerId,CreationDate,Score,ViewCount,Body,OwnerUserId,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount
14,15,2,1.0,,2010-07-19T19:19:46.160,17,,John Cook gives some interesting recommendatio...,6.0,2010-07-19T19:19:46.160,,,,0,


In [11]:
answer = 'Experts are given counters (or what one can think of as casino chips) representing equal densities whose total would sum up' # answer to question id = 1
post_df.loc[post_df['Body'].apply(lambda x: answer in str(x))]

Unnamed: 0,Id,PostTypeId,ParentId,AcceptedAnswerId,CreationDate,Score,ViewCount,Body,OwnerUserId,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount
142,154,2,1.0,,2010-07-19T22:40:47.947,25,,I am currently researching the trial roulette ...,108.0,2010-09-03T17:46:44.017,,,,2,


### What are PostTypeIds?

Only post with postTypeId = 1 have titles. Because only questions have titles and answers don't have title.  Or if PostTypeId != 1, then title is missing.

In [12]:
sum(post_df["Title"].isnull()) + 108954 == post_df.shape[0] # 

True

A guess: if PostTypeId is not 1 and 2, then they are answers and they are not accepted answers?

In [13]:

acceptedAnswerId = post_df['AcceptedAnswerId'].unique()

In [14]:
#34167 answers are accepted.
len(acceptedAnswerId)
not_accepted = post_df.loc[post_df['Id'].apply(lambda x: x not in acceptedAnswerId)]

In [15]:
not_accepted['PostTypeId'].unique()

array([1, 2, 7, 5, 4, 6, 3])

So PostTypeId is not related to whether an answer is accepted or not.

** What is it?**

In [16]:
post_df.loc[post_df['PostTypeId'] == 4]['Body'][0:10]

2182    Use this tag for any *on-topic* question that ...
2264    Mixed (aka multilevel or hierarchical) models ...
2419    Psychometrics has evolved as a subfield of psy...
2778    Model selection is a problem of judging which ...
2780    Cluster analysis is the task of partitioning d...
2782    Time series are data observed over time (eithe...
2784    Hypothesis testing assesses whether data suppo...
2935    Prediction of the future events. It is a speci...
3714    Stata is a proprietary  cross-platform general...
5647    IBM SPSS Statistics (formerly SPSS, i.e. "Stat...
Name: Body, dtype: object

### Question posts: 

In [17]:
Q_df = post_df.loc[post_df['PostTypeId'] == 1][['Id', 'Title','Body']]

Q_df.head()

Unnamed: 0,Id,Title,Body
0,1,Eliciting priors from experts,How should I elicit prior distributions from e...
1,2,What is normality?,In many different statistical methods there is...
2,3,What are some valuable Statistical Analysis op...,What are some valuable Statistical Analysis op...
3,4,Assessing the significance of differences in d...,I have two groups of data. Each with a differ...
5,6,The Two Cultures: statistics vs. machine learn...,"Last year, I read a blog post from Brendan O'C..."


### Answer posts:

In [18]:
A_df =  post_df.loc[post_df['PostTypeId'] != 1][['Id', 'Body']]
A_df.head()

Unnamed: 0,Id,Body
4,5,The R-project\n\nhttp://www.r-project.org/\n\n...
8,9,"Incanter is a Clojure-based, R-like platform (..."
11,12,"See my response to ""Datasets for Running Stati..."
12,13,Machine Learning seems to have its basis in th...
13,14,I second that Jay. Why is R valuable? Here's a...


### original post data

In [19]:
original_q_posts = Q_df['Body']
original_a_posts = A_df['Body']

original_q_titles = Q_df['Title']


print(original_q_posts[0:10])
print(original_a_posts[0:10])

print(original_q_titles[0:10])

0     How should I elicit prior distributions from e...
1     In many different statistical methods there is...
2     What are some valuable Statistical Analysis op...
3     I have two groups of data.  Each with a differ...
5     Last year, I read a blog post from Brendan O'C...
6     I've been working on a new method for analyzin...
7     Sorry, but the emptyness was a bit overwhelmin...
9     Many studies in the social sciences use Likert...
10    Is there a good, modern treatment covering the...
16    I have four competing models which I use to pr...
Name: Body, dtype: object
4     The R-project\n\nhttp://www.r-project.org/\n\n...
8     Incanter is a Clojure-based, R-like platform (...
11    See my response to "Datasets for Running Stati...
12    Machine Learning seems to have its basis in th...
13    I second that Jay. Why is R valuable? Here's a...
14    John Cook gives some interesting recommendatio...
15    Two projects spring to mind:\n\n\nBugs - takin...
17    Also see the UCI

In [20]:
original_q_titles.shape

(108954,)

In [21]:
thefile = open('./data/questions_body.txt', 'w')
for post in original_q_posts:
    post = str(post).replace('\n', ' ')
    thefile.write("%s\n" % post)
thefile = open('./data/answers_body.txt', 'w')
for post in original_a_posts:
    post = str(post).replace('\n', ' ')
    thefile.write("%s\n" % post)


In [22]:
len(original_q_titles)

108954

In [35]:
thefile = open('./data/questions_title.txt', 'w')

for i, post in enumerate(original_q_titles):
    post = "*"+ str(i+1) + str(post).replace('\n', "") 
    thefile.write("%s\n" % post)
    
    

In [36]:
i

108953

### data preprocessing


In [150]:
def show_work_status(singleCount, totalCount, currentCount = 0):
    currentCount += singleCount
    percentage = currentCount/totalCount *100
    status = ">" * int(percentage) +  " " * (100-int(percentage))
    sys.stdout.write('\rStatus:[{0}] {1:.2f}%'.format(status, percentage))
    sys.stdout.flush()
    if percentage >= 100:
    	print('\n')

In [24]:
# lemmatizer = nltk.WordNetLemmatizer()
# translation = str.maketrans(string.punctuation,' '*len(string.punctuation)) #define a replacement rule: replace all punctuation by " " 

# def preprocessing(line: str) -> str:
#     line = str(line).translate(translation)
#     line = nltk.word_tokenize(line.lower())
#     line = [lemmatizer.lemmatize(t) for t in line]
#     return " ".join(line)
    

In [25]:
def read_corpus(fname, tokens_only=False):
    with smart_open.smart_open(fname, encoding="iso-8859-1") as f:
        for i, line in enumerate(f):
            if tokens_only:
                
                yield gensim.utils.simple_preprocess(line)
                #This lowercases, tokenizes, de-accents (optional). – the output are final tokens = unicode strings, that won’t be processed any further.
            else:
                # For training data, add tags
                yield gensim.models.doc2vec.TaggedDocument(gensim.utils.simple_preprocess(line), [i])


In [26]:
gensim.utils.simple_preprocess("It is a sunny day.") # remove stop words, lower cases, tokenized

['it', 'is', 'sunny', 'day']

In [37]:
# It takes some time.
fname = './data/'


word2vec_question_title_corpus = list(read_corpus(fname + "questions_title.txt", tokens_only = True ))

In [38]:
len(word2vec_question_title_corpus)

108954

In [40]:
word2vec_question_corpus = list(read_corpus(fname + "questions_body.txt", tokens_only = True ))
word2vec_answer_corpus = list(read_corpus(fname + "answers_body.txt", tokens_only = True ))

In [41]:
print(word2vec_question_corpus[0:3])
print(word2vec_answer_corpus[0:3])
print(word2vec_question_title_corpus[0:3])

print(len(word2vec_question_corpus))
print(len(word2vec_answer_corpus))
print(len(word2vec_question_title_corpus))

[['how', 'should', 'elicit', 'prior', 'distributions', 'from', 'experts', 'when', 'fitting', 'bayesian', 'model'], ['in', 'many', 'different', 'statistical', 'methods', 'there', 'is', 'an', 'assumption', 'of', 'normality', 'what', 'is', 'normality', 'and', 'how', 'do', 'know', 'if', 'there', 'is', 'normality'], ['what', 'are', 'some', 'valuable', 'statistical', 'analysis', 'open', 'source', 'projects', 'available', 'right', 'now', 'edit', 'as', 'pointed', 'out', 'by', 'sharpie', 'valuable', 'could', 'mean', 'helping', 'you', 'get', 'things', 'done', 'faster', 'or', 'more', 'cheaply']]
[['the', 'project', 'http', 'www', 'project', 'org', 'is', 'valuable', 'and', 'significant', 'because', 'it', 'was', 'the', 'first', 'widely', 'accepted', 'open', 'source', 'alternative', 'to', 'big', 'box', 'packages', 'it', 'mature', 'well', 'supported', 'and', 'standard', 'within', 'many', 'scientific', 'communities', 'some', 'reasons', 'why', 'it', 'is', 'useful', 'and', 'valuable', 'there', 'are', 's

In [42]:
print(Q_df.shape)
print(A_df.shape)

(108954, 3)
(107008, 2)


In [209]:
# It takes some time.
doc2vec_question_corpus = list(read_corpus(fname + "questions_body.txt" ))
doc2vec_answer_corpus = list(read_corpus(fname + "answers_body.txt" ))


In [210]:
doc2vec_question_title_corpus = list(read_corpus(fname + "questions_title.txt" ))

In [211]:
print(doc2vec_question_corpus[0:3])
print(doc2vec_answer_corpus[0:3])
print(doc2vec_question_title_corpus[0:3])
print(len(doc2vec_question_corpus))
print(len(doc2vec_answer_corpus))
print(len(doc2vec_question_title_corpus))

[TaggedDocument(words=['how', 'should', 'elicit', 'prior', 'distributions', 'from', 'experts', 'when', 'fitting', 'bayesian', 'model'], tags=[0]), TaggedDocument(words=['in', 'many', 'different', 'statistical', 'methods', 'there', 'is', 'an', 'assumption', 'of', 'normality', 'what', 'is', 'normality', 'and', 'how', 'do', 'know', 'if', 'there', 'is', 'normality'], tags=[1]), TaggedDocument(words=['what', 'are', 'some', 'valuable', 'statistical', 'analysis', 'open', 'source', 'projects', 'available', 'right', 'now', 'edit', 'as', 'pointed', 'out', 'by', 'sharpie', 'valuable', 'could', 'mean', 'helping', 'you', 'get', 'things', 'done', 'faster', 'or', 'more', 'cheaply'], tags=[2])]
[TaggedDocument(words=['the', 'project', 'http', 'www', 'project', 'org', 'is', 'valuable', 'and', 'significant', 'because', 'it', 'was', 'the', 'first', 'widely', 'accepted', 'open', 'source', 'alternative', 'to', 'big', 'box', 'packages', 'it', 'mature', 'well', 'supported', 'and', 'standard', 'within', 'many

** How to use the titles? Independently or merge into the body? **

# Parameters for word2vec and doc2vec models:

In [167]:
sizes = np.linspace(20, 200, num=4)
print(sizes)

windows = np.linspace(1,4, num =2 )
print(windows)

min_counts = [1]
print(min_counts )

[  20.   80.  140.  200.]
[ 1.  4.]
[1]


# Word2Vec Model

In [None]:
# Read from the config file. 
# paras = json.loads(open('./configure/word2vec.json').read())
# print(paras)

# min_count = paras['min_count']
# size = paras['size']
# window = paras['window']
# workers = paras['workers']
# sg = paras['sg']
# alpha = paras["alpha"]
# hs = paras["hs"]
# negative = paras["negative"]


In [205]:
def train_word_model(sentences, **paras):
    min_count = paras['min_count']
    size = paras['size']
    window = paras['window']
    model = gensim.models.Word2Vec(sentences, min_count = min_count, size = size, window = window, workers = 4)
    fname = 'word2vector_model' + str(int(size))+"_"+str(int(window))+"_"+str(int(min_count))
    model.save(fname)
    return model

def glance_word(model):
#     print('*********vector representation of \'statistics\'********')
#     print(model['statistics'])
#     print('\n')
    print(model)
    print('*************most similar words to vector***************')
    print(model.most_similar('vector'))
    print('\n')
    print("**************Similarity of \'probability\' and \'distribution\'******************")
    print(model.similarity('probability','distribution'))
    print('\n')
    print("**************Similarity of \'gaussian\' and \'normal\'******************")
    print(model.similarity('gaussian','normal'))
    print('\n')

def filter_corpus(sentences,model, doc = 0):
    vocab = list(model.wv.vocab.keys())
    print(vocab[0:10])
    filtered_sentences = []
    i = 0
    total_count = len(sentences)
    current_count = 0
    for sentence in sentences:
        i += 1
        if i % 1000 == 0:
            show_work_status(1000, total_count, current_count)
            current_count += 1000
        if doc == 0:
            words = list(filter(lambda x: x in vocab, sentence))
            #words = [word for word in words]
        if doc == 1:
            words = list(filter(lambda x: x in vocab, sentence.words))
            #words = [word for word in words]
        filtered_sentences.append(words)
    return filtered_sentences

def evaluate_word_model(model,topk, min_count):
    n =len(word2vec_question_corpus)
    random.seed(10)
    doc_id = random.randint(0,n)
    # pick up a question randomly and find similar questions.
    if min_count == 1:
        word2vec_question_corpus_filter = word2vec_question_corpus
    else:
        word2vec_question_corpus_filter = filter_corpus(word2vec_question_corpus, model) 
    sen_interest = word2vec_question_corpus_filter[doc_id]
    print('filtering is done.')
    sims = []
    current_n = 0
    for i in range(int(len(Q_df['Body'])/1000)):
#         if i % 500 == 0:
#             show_work_status(500,int(len(Q_df['Body'])/1000),current_n)
        compared_sen = word2vec_question_corpus_filter[i]
        sim = model.wv.n_similarity(sen_interest, compared_sen)
        sims.append(sim)
    most_similar_index = heapq.nlargest(topk, range(len(sims)), key=sims.__getitem__)
    print('*****************The question we are interested in is: ************************')
    print(Q_df.iloc[doc_id]['Body'])
    print("*******************Similar questions are **********************")
    for i in most_similar_index :
        print(i)
        print('similarity is ', sims[i])
        print(Q_df.iloc[i]['Body'])
        print("**********************")
    return sims, doc_id

### training  models.

In [187]:
# Tuning parameters on grids
models = {}
for size in sizes:
    for window in windows:
        for min_count in min_counts:
            paras =  {'size':int(size), 'window' : int(window), "min_count" :int(min_count)}
            model_name =   str(int(size))+"_"+str(int(window))+"_"+str(int(min_count))
            models[model_name] = train_word_model(word2vec_question_corpus, **paras)

In [204]:
for key in models.keys():
    print(key)
    glance_word(models[key])
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')

title_200_1_1
Word2Vec(vocab=114657, size=200, alpha=0.025)
*************most similar words to vector***************
[('vectors', 0.779895007610321), ('sequence', 0.5380569696426392), ('scalar', 0.5291681885719299), ('numtoswitch', 0.5225784778594971), ('boldsymbol', 0.5199030637741089), ('boltzmann', 0.5158196687698364), ('mathbf', 0.5153965950012207), ('knowledgecenter', 0.5143557786941528), ('tuple', 0.5135056376457214), ('bf', 0.5073824524879456)]


**************Similarity of 'probability' and 'distribution'******************
0.45112888745


**************Similarity of 'gaussian' and 'normal'******************
0.681192582025


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
title_140_4_1
Word2Vec(vocab=114657, size=140, alpha=0.025)
*************most similar words to vector***************
[('vectors', 0.7099881172180176), ('scalar', 0.6378396153450012), ('matrix', 0.6202725172042847), ('element', 0.5627481937408447), ('mathbf', 0.55721670389175

In [201]:
for key in models.keys():
    print(key)
    evaluate_word_model(models[key], 3, 1)
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')

title_200_1_1
filtering is done.
*****************The question we are interested in is: ************************
Let $F:(-\infty,\infty)\rightarrow[0,1]$ and $G:(-\infty,\infty)\rightarrow[0,1]$ be two CDFs with PDFs $f$ and $g$, respectively. Is there a connection/inequality between:

$$d_1 = \int_{-\infty}^{\infty}\vert f(t) - g(t) \vert dt,$$
and
$$d_2 = \int_{-\infty}^{\infty}\vert F(t) - G(t) \vert dt?$$
Assuming $d_2$ exists ($d_1$ is always finite).

*******************Similar questions are **********************
51
similarity is  0.846244543109
I know this must be standard material, but I had difficulty in finding a proof in this form.

Let $e$ be a standard white Gaussian vector of size $N$.  Let all the other matrices in the following be constant.

Let $v = Xy + e$, where $X$ is an $N\times L$ matrix and $y$ is an $N\times 1$ vector, and let

$$\left\{\begin{align}
\bar y &amp;= (X^TX)^{-1}X^Tv\\
\bar e &amp;= v - X\bar y
\end{align}\right.\quad.$$

If $c$ is any constant vec

# doc2vec mdoel

In [220]:
def train_doc_model(sentences, **paras):
    min_count = paras['min_count']
    size = paras['size']
    window = paras['window']
    doc2vec_model = gensim.models.doc2vec.Doc2Vec(
                min_count = min_count, 
                 size =size, 
                 window =window
    )
    doc2vec_model.build_vocab(sentences)
    
    fname = 'doc2vector_model' + str(int(size))+"_"+str(int(window))+"_"+str(int(min_count))
    doc2vec_model.save(fname)
    return doc2vec_model



In [244]:
# def glance_doc(model):
#     print(model)
#     print('*************most similar words to vector***************')
#     print(model.docvecs.most_similar(0))
#     print('\n')
#     print("**************Similarity of \'probability\' and \'distribution\'******************")
#     print(model.docvecs.similarity('probability','distribution'))
#     print('\n')
#     print("**************Similarity of \'gaussian\' and \'normal\'******************")
#     print(model.docvecs.similarity('gaussian','normal'))
#     print('\n')





In [256]:
def evaluate_doc_model(doc2vec_model,topk, min_count):
    n =len(doc2vec_question_corpus)
    random.seed(0)
    doc_id = random.randint(0,n)
    
    inferred_vector = doc2vec_model.infer_vector(doc2vec_question_corpus[doc_id].words)
    sims = doc2vec_model.docvecs.most_similar([inferred_vector],  topn= len(doc2vec_question_corpus))
    
    most_similar_index = sims[0:topk]
    print('*****************The question we are interested in is: ************************')
    print(Q_df.iloc[doc_id]['Body'])
    print("*******************Similar questions are **********************")
    for i in most_similar_index :
        
        print('similarity is ', i)
        print(Q_df.iloc[i[0]]['Body'])
        print("**********************")
    return sims, doc_id

Each object represents a single sentence, and consists of two simple lists: a list of words and a list of labels.

## train the model

In [262]:
# Tuning parameters on grids
doc_models = {}
for size in sizes:
    for window in windows:
        for min_count in min_counts:
            paras =  {'size':int(size), 'window' : int(window), "min_count" :int(min_count)}
            model_name = 'doc_'+  str(int(size))+"_"+str(int(window))+"_"+str(int(min_count))
            doc_models[model_name] = train_doc_model(doc2vec_question_corpus, **paras)

In [265]:
doc_models.keys()

dict_keys(['doc80_4_1', 'doc20_1_1', 'doc140_1_1', 'doc140_4_1', 'doc20_4_1', 'doc200_1_1', 'doc80_1_1', 'doc200_4_1'])

In [268]:
for key in doc_models.keys():
    print(key)
    evaluate_doc_model(doc_models[key],3, 1)

doc80_4_1
*****************The question we are interested in is: ************************
I would be very thankful if somebody could help me and explain the answer in simple terms.

I have y, x, z variables. They are countinuous, no missing values. y is dependent variable, x and z independent. There is significant correlation between them (varies from 0.25 to 0.35).

My hypothesis is that when z is very high, then x and y dependency is linear (x goes bigger, y smaller).
When z is very low, then in the beginning the situation is the same (dependency is linear), but in the end (about last quarter) it changes to quadratic.

So basically there are two different relationships between x and y, but the nature of the relationship depends on z which is again a continuous variable. 

How can I test this model? Is it possible doing linear regression or do I need to use something else?

*******************Similar questions are **********************
similarity is  (15463, 0.47558698058128357)
Coul