In [2]:
# Install missing packages to kernel.

! pip install pandas
! pip install scikit-learn
! pip install numpy



In [3]:
# Import packages.

import pandas as pd 
import numpy as np
import scipy.sparse as sp
pd.options.display.max_colwidth = None
pd.set_option('display.max_rows', 10)
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import normalize
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_similarity

# CSDS-435: Project #2
# Adam Brennan and Charlie Song

## Helper Functions...

In [4]:
def get_cnn_health_tweets_dataframe():
    # read in the cnnhealth.txt file using utf-8 encoding
    with open('cnnhealth.txt', 'r', encoding='utf-8') as cnn_health_tweets_dataset:
        # initialize empty list to store tweets
        tweets = []

        # for each row in the dataset, skip the first two columns and append the tweet to the tweets list
        for row in cnn_health_tweets_dataset:
            columns = row.strip().split('|')
            tweet = '|'.join(columns[2:])
            tweets.append([tweet])

    # convert list of tweets to pandas DataFrame
    df = pd.DataFrame(tweets, columns=['tweet'])
    
    # return the resulting pandas DataFrame
    return df

In [5]:
def get_bag_of_words_matrix(tweets_list):
    # initialize CountVectorizer opbject 
    vectorizer = CountVectorizer()

    # build bag of words matrix from tweets list
    bag_of_words_matrix = vectorizer.fit_transform(tweets_list)
    
    return bag_of_words_matrix

In [6]:
def get_bag_of_words_dataframe(tweets_list):
    # initialize CountVectorizer opbject 
    vectorizer = CountVectorizer()

    # build bag of words matrix from tweets list
    bag_of_words_matrix = vectorizer.fit_transform(tweets_list)

    # convert bag of words matrix to a pandas DataFrame
    bag_of_words_df = pd.DataFrame(bag_of_words_matrix.toarray(), columns = vectorizer.get_feature_names_out())
    
    return bag_of_words_df

In [7]:
def get_tweet(tweets_df, tweet_index):
    # return the tweet at the specified index in string form
    return str(tweets_df.iloc[tweet_index])

In [8]:
def get_num_total_tokens(tweets_list):
    num_total_tokens = 0

    for tweet in tweets_list:
        num_total_tokens += len(tweet)

    return num_total_tokens

In [9]:
def get_tweets_summary_dataframe(tweets_list):
    num_tweets = len(tweets_list)
    num_total_tokens = get_num_total_tokens(tweets_list)
    num_unique_tokens = get_bag_of_words_dataframe(tweets_list).shape[1]
    num_avg_tokens_per_tweet = num_total_tokens / num_tweets

    data = [[num_tweets, num_total_tokens, num_unique_tokens, num_avg_tokens_per_tweet, 'CNN Health Tweets']]

    columns = ['# Tweets', '# Total Tokens', '# Unique Tokens', 'Avg. Tokens Per Tweet', 'Description']

    tweets_summary_dataframe = pd.DataFrame(data, columns=columns)
    
    return tweets_summary_dataframe

In [10]:
def get_euclidean_distance(bag_of_words_matrix):
    bag_of_words_matrix_ed = euclidean_distances(bag_of_words_matrix)

    np.savetxt("euclidean_distance.txt", bag_of_words_matrix_ed, delimiter=',', fmt='%.2f')

    return bag_of_words_matrix_ed

In [11]:
def get_cosine_similarity(bag_of_words_matrix):
    bag_of_words_matrix_cs = cosine_similarity(bag_of_words_matrix)

    np.savetxt("cosine_similarity.txt", bag_of_words_matrix_cs, delimiter=',', fmt='%.2f')

    return bag_of_words_matrix_cs

### Reading in the "cnnhealth.txt" file...

In [12]:
# get pandas DataFrame
tweets_df = get_cnn_health_tweets_dataframe()

# display DataFrame
tweets_df

Unnamed: 0,tweet
0,"An abundance of online info can turn us into e-hypochondriacs. Or, worse, lead us to neglect getting the care we need http://cnn.it/1L1t1Fv"
1,A plant-based diet that incorporates fish may be the key to preventing colorectal cancers: http://cnn.it/1xdpsjT http://pbs.twimg.com/media/CAARHEGWEAAJGz6.jpg
2,It doesn't take much to damage your hearing at a sports bar or nightclub. That's why a billion people are at risk. http://cnn.it/1BOphBk
3,RT @CNN: Forever young? Discover this island’s secrets to longevity on #TheWonderList w/ @BillWeirCNN http://cnn.it/1GNdmqc https://t.co/…
4,RT @CNN: Is post-traumatic stress disorder in your genes? A simple blood test may one day help tell you http://cnn.it/1xls8w5 http://t.co/…
...,...
4056,RT @EverydayHealth: Want killer abs? @JillianMichaels shows you how get them: http://at.cnn.com/ubAkAN
4057,Medicare at stake -- @sanjayguptaCNN talks about politicians' plans http://at.cnn.com/7nLSOm7
4058,Ann Romney talks about her experience with MS http://at.cnn.com/khb6keC
4059,Make sure your first marathon isn't your last! http://at.cnn.com/liehiPl


### Get tweet as list...

In [13]:
# get list of tweets
tweets_list = tweets_df['tweet'].tolist()

### Get the "bag of words" pandas DataFrame using the previously generated tweets pandas DataFrame...

In [14]:
# generate "bag of words" pandas DataFrame
bag_of_words_dataframe = get_bag_of_words_dataframe(tweets_list)

# display "bag of words" pandas DataFrame
bag_of_words_dataframe

Unnamed: 0,000,008relo,00fvvdw,00igyua,01,01tdt1o,02,03,04,05,...,zwwtu7,zxodiqo,zy53fec,zy7u11,zyam2xi,zyl,zyt5bpe,zzetat,zzgrrgf,zzzzz
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4056,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4057,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4058,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4059,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Get the tweets summary DataFrame...

In [15]:
# generate "bag of words" pandas DataFrame
tweets_summary_dataframe = get_tweets_summary_dataframe(tweets_list)

tweets_summary_dataframe

Unnamed: 0,# Tweets,# Total Tokens,# Unique Tokens,Avg. Tokens Per Tweet,Description
0,4061,441036,11264,108.602807,CNN Health Tweets


### Distance measure #1: Eucliean distance

In [16]:
bag_of_words_matrix = get_bag_of_words_matrix(tweets_list)

bag_of_words_matrix_ed = get_euclidean_distance(bag_of_words_matrix)

bag_of_words_matrix_ed

array([[0.        , 6.4807407 , 6.55743852, ..., 6.        , 6.164414  ,
        5.65685425],
       [6.4807407 , 0.        , 6.40312424, ..., 5.65685425, 5.83095189,
        5.29150262],
       [6.55743852, 6.40312424, 0.        , ..., 5.74456265, 5.56776436,
        5.38516481],
       ...,
       [6.        , 5.65685425, 5.74456265, ..., 0.        , 4.47213595,
        3.74165739],
       [6.164414  , 5.83095189, 5.56776436, ..., 4.47213595, 0.        ,
        4.        ],
       [5.65685425, 5.29150262, 5.38516481, ..., 3.74165739, 4.        ,
        0.        ]])

### Distance measure #2: Cosine similarity

In [17]:
bag_of_words_matrix = get_bag_of_words_matrix(tweets_list)

bag_of_words_matrix_cs = get_cosine_similarity(bag_of_words_matrix)

bag_of_words_matrix_cs

array([[1.        , 0.22222222, 0.21821789, ..., 0.1067521 , 0.0993808 ,
        0.12830006],
       [0.22222222, 1.        , 0.25458754, ..., 0.21350421, 0.1987616 ,
        0.25660012],
       [0.21821789, 0.25458754, 1.        , ..., 0.20965697, 0.29277002,
        0.25197632],
       ...,
       [0.1067521 , 0.21350421, 0.20965697, ..., 1.        , 0.28644595,
        0.36980013],
       [0.0993808 , 0.1987616 , 0.29277002, ..., 0.28644595, 1.        ,
        0.34426519],
       [0.12830006, 0.25660012, 0.25197632, ..., 0.36980013, 0.34426519,
        1.        ]])

In [18]:
pd.set_option('display.max_rows', None)


### Clustering using KMeans...

#### First cluster with euclidean distance as the distance measure...

In [19]:
bow = get_bag_of_words_matrix(tweets_list)

kmeans = KMeans(n_clusters = 20, random_state = 0)
kmeans.fit(bow)

labels = kmeans.labels_
labels

  super()._check_params_vs_input(X, default_n_init=10)


array([14, 16, 17, ...,  2, 17, 18])

In [20]:
df = pd.DataFrame({
    'tweets': tweets_list,
    'label': labels
})

df[df['label'] == 1]

Unnamed: 0,tweets,label
999,Starting soon: Allergies getting you down? Join our #CNNAllergies Twitter chat at 1:30pm EST with @AllergyReliefNY http://on.fb.me/1k9W1v8,1
1003,TODAY: Join our #CNNAllergies Twitter chat at 1:30pm EST with @AllergyReliefNY and ask all of your allergy questions http://on.fb.me/1k9W1v8,1
1005,TODAY: Allergies getting you down? Join our #CNNAllergies Twitter chat at 1:30pm EST with @AllergyReliefNY http://on.fb.me/1k9W1v8,1
1009,Questions about #allergies? Ask us using hashtag #CNNAllergies. Chat is TOMORROW at 1:30pm EST with @AllergyReliefNY http://on.fb.me/1k9W1v8,1
1011,Allergies getting you down? Join our #CNNAllergies chat TOMORROW at 1:30pm EST with @AllergyReliefNY http://on.fb.me/1k9W1v8,1
1013,Join our #CNNAllergies chat TOMORROW at 1:30pm EST with @AllergyReliefNY http://on.fb.me/1k9W1v8,1
1016,Learn about #allergies tomorrow! Send us your questions before our #CNNAllergies chat on 4/23 @ 1:30 pm ET http://on.fb.me/1k9W1v8,1
1023,What can you do to get allergy relief? Join our #CNNAllergies chat on 4/23 at 1:30 p.m. EST with @AllergyReliefNY http://on.fb.me/1k9W1v8,1
1026,Allergies getting you down? Join our #CNNAllergies chat on 4/23 at 1:30 p.m. EST with @AllergyReliefNY http://on.fb.me/1k9W1v8,1
1027,Allergies getting you down? Join our #CNNAllergies chat on 4/23 at 1:30 p.m. EST with @AllergyReliefNY http://on.fb.me/1k9W1v8,1


#### First cluster with cosine similarity as the distance measure...

In [21]:
bow = get_bag_of_words_matrix(tweets_list)
normalized_bow = normalize(bow, norm='l2', axis=1)

kmeans = KMeans(n_clusters = 30, random_state = 0)
kmeans.fit(normalized_bow)

labels = kmeans.labels_
labels

  super()._check_params_vs_input(X, default_n_init=10)


array([21,  6,  2, ..., 15, 28, 15])

In [22]:
df = pd.DataFrame({
    'tweets': tweets_list,
    'label': labels
})

df[df['label'] == 0]

Unnamed: 0,tweets,label
1907,. @Schwarzenegger trained 1 hr/day to be in shape for @EscapePlanMovie. His tips for working out on the road http://www.cnn.com/2013/10/18/health/arnold-schwarzenegger-travel-workout/index.html,0
1936,Mentally ill #Latinos struggle to get help http://www.cnn.com/2013/10/09/health/latino-mental-health-disparities/index.html #LATISM @UN @WHO #WMHD,0
1945,"Fat-burning supplement pulled from shelves after it's linked to liver failure, acute hepatitis http://www.cnn.com/2013/10/09/health/oxyelite-pro-liver-damage/index.html @HIgov_Health",0
1946,#Latinos struggle to find help for #mental #health issues http://www.cnn.com/2013/10/09/health/latino-mental-health-disparities/index.html,0
1969,Were you or your child supposed to start a clinical trial that may be delayed b/c of the #shutdown? Let us know http://www.cnn.com/2013/10/01/health/shutdown-nih-clinical-trials/index.html,0
1975,Why #beer is good for your health http://www.cnn.com/2013/09/30/health/beer-good-health/,0
1976,"It's news Homer Simpson will love: Beer, it's good for you. The health benefits of a good cold one: http://www.cnn.com/2013/09/30/health/beer-good-health/?hpt=he_t3",0
1977,An ireporter lost 158 pounds in part by leaving a photographic reminder of just how heavy he was everywhere he looked http://www.cnn.com/2013/09/30/health/irpt-weight-loss-trotter/index.html?hpt=he_c1,0
1978,Check out these 15 superfoods for fall! http://www.cnn.com/2013/09/27/health/gallery/best-fall-superfoods/index.html,0
1979,"Hip hop health, a fun &amp; effective health program that helps kids lose weight &amp; helped one kid save his grandma's life http://www.cnn.com/2013/09/27/health/hip-hop-health/index.html",0


### Clustering using Hierarhical Clustering...

#### First cluster with euclidean distance as the distance measure...

In [23]:
bow = get_bag_of_words_matrix(tweets_list)
normalized_bow = normalize(bow, norm='l2', axis=1)
dense_bow = bow.todense()
bow_array = np.array(dense_bow)

hierarchica_clustering = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')
hierarchica_clustering.fit(bow_array)

labels = hierarchica_clustering.labels_
labels



array([1, 3, 4, ..., 1, 1, 1], dtype=int64)

In [24]:
df = pd.DataFrame({
    'tweets': tweets_list,
    'label': labels
})

df[df['label'] == 4]

Unnamed: 0,tweets,label
2,It doesn't take much to damage your hearing at a sports bar or nightclub. That's why a billion people are at risk. http://cnn.it/1BOphBk,4
3,RT @CNN: Forever young? Discover this island’s secrets to longevity on #TheWonderList w/ @BillWeirCNN http://cnn.it/1GNdmqc https://t.co/…,4
4,RT @CNN: Is post-traumatic stress disorder in your genes? A simple blood test may one day help tell you http://cnn.it/1xls8w5 http://t.co/…,4
7,"RT @CNNOpinion: Women can defeat #Alzheimers, says @mariashriver. #WipeOutAlz challenge will make it happen. http://cnn.it/1AklXJQ http://…",4
9,CDC: Misuse of garments may have led to release of bioterror bacteria at Tulane monkey lab. http://cnn.it/18HDKDg,4
17,RT @cnnbrk: U.S. Ebola patient headed to National Institutes of Health. http://cnn.it/1BxKgIa.,4
22,Kids = constant ear infections. Save yourself a trip to the ER w/ this: http://cnn.it/1xg6t8x #cnninstantstartups,4
26,Could a simple blood test someday tell if you're genetically predisposed to PTSD? http://cnn.it/1Mvai0w,4
27,RT @CNN: Explore the secrets of Ikaria on #TheWonderList with @BillWeirCNN Sun at 10pET/PT. http://cnn.it/1EfbKSL https://vine.co/v/O9nAHUHOnK2,4
28,Ear trouble? Save yourself a trip to the ER. One startup has created an iPhone attachment to diagnose ear infections. http://cnn.it/1GIzkux,4


#### First cluster with cosine similarity as the distance measure...

In [25]:
bow = get_bag_of_words_matrix(tweets_list)
normalized_bow = normalize(bow, norm='l2', axis=1)
dense_bow = normalized_bow.todense()
bow_array = np.array(dense_bow)

hierarchica_clustering = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')
hierarchica_clustering.fit(bow_array)

labels = hierarchica_clustering.labels_
labels



array([4, 2, 1, ..., 1, 1, 1], dtype=int64)

In [26]:
df = pd.DataFrame({
    'tweets': tweets_list,
    'label': labels
})

df[df['label'] == 3]

Unnamed: 0,tweets,label
114,Whitney Houston's daughter still on ventilator http://www.cnn.com/2015/02/02/entertainment/whitney-houston-daughter-hospitalized/index.html,3
117,Fit Nation is back: Meet the new team http://www.cnn.com/2015/01/28/health/feat-fit-nation-is-back/index.html,3
120,Details emerge about Joan Rivers death after lawsuit filed from @jechristensen http://www.cnn.com/2015/01/26/health/joan-rivers-lawsuit/ http://pbs.twimg.com/media/B8VAgxHCYAETD6L.jpg,3
122,"The world is one step closer to having a real weapon to fight Ebola, from @jechristensen http://www.cnn.com/2015/01/23/health/feat-ebola-vaccine-latest/index.html http://pbs.twimg.com/media/B8CvTxaIgAAEvrB.jpg",3
128,The flu is back at an epidemic level http://www.cnn.com/2015/01/16/health/new-flu-numbers/index.html http://pbs.twimg.com/media/B7gO-9eCMAIEmQ3.jpg,3
131,Heroin deaths are up for a third year in a row http://www.cnn.com/2015/01/14/health/heroin-deaths-increase/index.html http://pbs.twimg.com/media/B7Z0w7sCYAAM-Jq.jpg,3
132,"What are ""natural flavors,"" really? @AmandaWoerner http://www.cnn.com/2015/01/14/health/feat-natural-flavors-explained/index.html http://pbs.twimg.com/media/B7VZavoIIAAky2s.jpg",3
133,Do any of your resolutions involve eating better? Here's some easy ways to do just that http://www.cnn.com/2015/01/14/health/feat-healthy-eating-habits/index.html http://pbs.twimg.com/media/B7VRs4CCcAAW-LX.jpg,3
145,The flu is coming to a state near you (if it isn't there already) http://www.cnn.com/2015/01/05/health/flu-activitity-spreads/index.html http://pbs.twimg.com/media/B6nz0vYCIAAExRX.jpg,3
147,Caramel apples thought to be linked to deadly listeria outbreak in 10 states @jechristensen http://www.cnn.com/2014/12/19/health/caramel-apple-listeria/index.html?hpt=he_c2 http://pbs.twimg.com/media/B5Pfk_mIgAAHQBT.jpg,3


### Clustering using DBSCAN...

#### First cluster with euclidean distance as the distance measure...

In [27]:
bow = get_bag_of_words_matrix(tweets_list)

cluster = DBSCAN(eps=3, min_samples=2).fit(bow)

labels = cluster.labels_
labels

array([-1, -1, -1, ..., -1, -1, 10], dtype=int64)

In [28]:
df = pd.DataFrame({
    'tweets': tweets_list,
    'label': labels
})

df[df['label'] == 1]

Unnamed: 0,tweets,label
197,#StrokeChat #WorldStrokeDay http://pbs.twimg.com/media/B1IVu2QCAAAM8or.jpg,1
203,Are you at risk? Join us for a #WorldStrokeDay #StrokeChat w/ @American_Stroke at 1 p.m. ET http://pbs.twimg.com/media/B1G_rDaCAAANet9.jpg,1
204,Join us for a #WorldStrokeDay #StrokeChat w/ @American_Stroke at 1 p.m. ET http://pbs.twimg.com/media/B1G_aLSCAAAeyBU.jpg,1
777,"RT @DarianMurray: @cnnhealth oatmeal. Raisins, walnuts. http://pbs.twimg.com/media/BpNETvqIQAAWzsn.jpg",1
778,"RT @DDworkouts: @cnnhealth granola, yogurt, strawberries. Fancy. http://pbs.twimg.com/media/BpNRZhfCYAAc1xT.jpg",1
1175,"RT @UMN_Health: @cnnhealth @WilliamCNN We do, too! http://pbs.twimg.com/media/BjqbzykCEAAU7Sc.jpg",1
1176,RT @MMGHealth: @cnnhealth @WilliamCNN We hear you! http://pbs.twimg.com/media/BjqOwO2CAAA4Mtx.jpg,1
1185,Mmmm! RT @AtarheHoover: @cnnhealth http://pbs.twimg.com/media/BjkodIgCYAEghBa.jpg,1
1186,RT @apelsinkaX: @cnnhealth http://pbs.twimg.com/media/BjkiAskIYAA4r7G.jpg,1
2851,RT @Taversha: @cnnhealth A cup ring of pollen! http://pbs.twimg.com/media/BHlpN0cCQAE8Ez2.jpg,1


#### First cluster with cosine similarity as the distance measure...

In [29]:
bow = get_bag_of_words_matrix(tweets_list)
normalized_bow = normalize(bow, norm='l2', axis=1)

cluster = DBSCAN(eps=3, min_samples=2).fit(normalized_bow)

labels = cluster.labels_
labels

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [30]:
df = pd.DataFrame({
    'tweets': tweets_list,
    'label': labels
})

df[df['label'] == 1]

Unnamed: 0,tweets,label


### Clustering using EM Clustering...

#### First cluster with euclidean distance as the distance measure...

In [35]:
bow = get_bag_of_words_matrix(tweets_list)
normalized_bow = normalize(bow, norm='l2', axis=1)
dense_bow = bow.todense()
bow_array = np.asarray(dense_bow)

cluster = GaussianMixture(n_components=3)
cluster.fit(bow_array)

labels = cluster.predict(bow_array)
labels

array([1, 0, 1, ..., 2, 2, 2], dtype=int64)

In [36]:
df = pd.DataFrame({
    'tweets': tweets_list,
    'label': labels
})

df[df['label'] == 1]

Unnamed: 0,tweets,label
0,"An abundance of online info can turn us into e-hypochondriacs. Or, worse, lead us to neglect getting the care we need http://cnn.it/1L1t1Fv",1
2,It doesn't take much to damage your hearing at a sports bar or nightclub. That's why a billion people are at risk. http://cnn.it/1BOphBk,1
3,RT @CNN: Forever young? Discover this island’s secrets to longevity on #TheWonderList w/ @BillWeirCNN http://cnn.it/1GNdmqc https://t.co/…,1
9,CDC: Misuse of garments may have led to release of bioterror bacteria at Tulane monkey lab. http://cnn.it/18HDKDg,1
12,RT @CNN: A plant-based diet that incorporates fish may be the key to preventing colorectal cancers: http://cnn.it/1x1ffMU http://t.co/OIu6…,1
17,RT @cnnbrk: U.S. Ebola patient headed to National Institutes of Health. http://cnn.it/1BxKgIa.,1
22,Kids = constant ear infections. Save yourself a trip to the ER w/ this: http://cnn.it/1xg6t8x #cnninstantstartups,1
26,Could a simple blood test someday tell if you're genetically predisposed to PTSD? http://cnn.it/1Mvai0w,1
28,Ear trouble? Save yourself a trip to the ER. One startup has created an iPhone attachment to diagnose ear infections. http://cnn.it/1GIzkux,1
31,A Tulane researcher has been exposed to deadly bioterror bacteria at a monkey lab. http://cnn.it/1AhvHnW,1


#### First cluster with cosine similarity as the distance measure...

In [38]:
bow = get_bag_of_words_matrix(tweets_list)
normalized_bow = normalize(bow, norm='l2', axis=1)
dense_bow = normalized_bow.todense()
bow_array = np.asarray(dense_bow)

cluster = GaussianMixture(n_components=3)
cluster.fit(bow_array)

labels = cluster.predict(bow_array)
labels

array([2, 2, 1, ..., 0, 0, 0], dtype=int64)

In [39]:
df = pd.DataFrame({
    'tweets': tweets_list,
    'label': labels
})

df[df['label'] == 2]

Unnamed: 0,tweets,label
0,"An abundance of online info can turn us into e-hypochondriacs. Or, worse, lead us to neglect getting the care we need http://cnn.it/1L1t1Fv",2
1,A plant-based diet that incorporates fish may be the key to preventing colorectal cancers: http://cnn.it/1xdpsjT http://pbs.twimg.com/media/CAARHEGWEAAJGz6.jpg,2
3,RT @CNN: Forever young? Discover this island’s secrets to longevity on #TheWonderList w/ @BillWeirCNN http://cnn.it/1GNdmqc https://t.co/…,2
4,RT @CNN: Is post-traumatic stress disorder in your genes? A simple blood test may one day help tell you http://cnn.it/1xls8w5 http://t.co/…,2
5,"Maysoon Zayid, a touring standup comic with Cerebral Palsy, has a message to share. http://cnn.it/1GNiH0L http://pbs.twimg.com/media/B__ubV_UQAAUNOO.jpg",2
6,"How women can wipe out Alzheimer's, from @mariashriver. http://cnn.it/1AklXJQ http://pbs.twimg.com/media/B__sQMiUUAAGMre.jpg",2
7,"RT @CNNOpinion: Women can defeat #Alzheimers, says @mariashriver. #WipeOutAlz challenge will make it happen. http://cnn.it/1AklXJQ http://…",2
8,Is it time to raise the legal smoking age? http://cnn.it/1GNhBC8 http://pbs.twimg.com/media/B__rgYfUwAALomW.jpg,2
9,CDC: Misuse of garments may have led to release of bioterror bacteria at Tulane monkey lab. http://cnn.it/18HDKDg,2
10,"Losing a brain tumor, gaining perspective: CNN's Jessica Moskowitz's #FirstPerson experience. http://cnn.it/1EhlrjJ http://pbs.twimg.com/media/B_-tiaAUgAArA_6.jpg",2
