Code to scrape all comments and main posts of Polymath projects.

# Preliminaries

In [None]:
# ! pip install requests beautifulsoup4

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from urllib.parse import urljoin
from google.colab import files

In [None]:
column_order = [
    'id',
    'time',
    'in-reply-to',
    'in-reply-to-href',
    'author',
    'author-href',
    'comment-id',
    'comment-href',
    'post-id',
    'content'
]

# Functions

In [None]:
###  PARSING  ###

def get_site_for_url(url):
    if 'en.wikipedia.org' in url:
        return 'W'
    elif 'gowers.wordpress.com' in url:
        return 'G'
    elif 'terrytao.wordpress.com' in url:
        return 'T'
    elif 'gilkalai.wordpress.com' in url:
        return 'K'
    elif 'dustingmixon.wordpress.com' in url:
        return 'D'
    elif 'sbseminar.wordpress.com' in url:
        return 'M'
    else:
        return 'P'

def parse_url_file(urls_path='urls.txt'):
    with open(urls_path, 'r') as file:
        lines = file.read().strip().split()

    project_urls_dict = {}
    curr = None
    for line in lines:
        if line.startswith('POLYMATH') or line.startswith('MINI'):
            curr = line[0] + ''.join(re.findall(r'\d+', line))
            project_urls_dict[curr] = []
        elif line != '':
            project_urls_dict[curr].append((get_site_for_url(line), line))
    return project_urls_dict

def parse_urls(urls):
    """
    """
    url_list = [url.strip() for url in urls.strip().split('\n')]
    blog_url_pairs = []
    for url in url_list:
        if 'gowers.wordpress.com' in url:
            blog = 'G'
        elif 'terrytao.wordpress.com' in url:
            blog = 'T'
        elif 'gilkalai.wordpress.com' in url:
            blog = 'K'
        elif 'dustingmixon.wordpress.com' in url:
            blog = 'D'
        elif 'sbseminar.wordpress.com' in url:
            blog = 'M'
        else:
            blog = 'P'
        blog_url_pairs.append((blog, url))
    return blog_url_pairs

def parse_datetime(datetime_str):
    try:
        # Gowers, Gil Kalai blog format
        # Example: 'February 1, 2009 at 8:59 pm'
        dt = datetime.strptime(datetime_str, '%B %d, %Y at %I:%M %p')
    except ValueError:
        try:
            # Tao blog format
            # Example: '23 May, 2009 at 10:42 am'
            dt = datetime.strptime(datetime_str, '%d %B, %Y at %I:%M %p')
        except ValueError:
            try:
                # Polymath blog format
                # Example: 'July 12, 2012 @10:11 pm'
                dt = datetime.strptime(datetime_str, '%B %d, %Y @%I:%M %p')
            except:
                raise ValueError(f'Unknown format: {datetime_str}')

    return dt

In [None]:
###  POST SCRAPING  ###

def scrape_all_posts(url_dict):
    post_dict = {
        'project-id' : [],
        'post-id' : [],
        'post-href' : [],
        'paragraph-idx' : [],
        'paragraph-content' : []
    }

    for project, urls in url_dict.items():
        for (blog, url) in urls:
            paragraphs, soup = scrape_page(blog, url)
            post_id_tag = soup.find(
                (lambda tag : tag.has_attr('id') and tag['id'].startswith('post'))
            )
            post_id = post_id_tag['id'].split('-')[1]
            for i, p in enumerate(paragraphs):
                post_dict['project-id'].append(project)
                post_dict['post-id'].append(blog + post_id)
                post_dict['post-href'].append(url)
                post_dict['paragraph-idx'].append(str(i))
                post_dict['paragraph-content'].append(p)
        print(f'PROJECT {project} COMPLETE. \n')
    df = pd.DataFrame(post_dict)
    df['id'] = df['project-id'] + '-' + df['post-id'] + '-' + df['paragraph-idx']
    return df

In [None]:
###  COMMENT SCRAPING  ###

def get_child_ids(blog, comment):
    if blog == 'T':
        # Replies are in a 'sibling' environment
        reply_env = comment.find_next_sibling(class_='children')
    else:
        # Replies are nested in the parent enviornment
        reply_env = comment.find(class_='children')
    try:
        reply_tag = reply_env.find_all(
            id=lambda x : x and x.startswith('comment-')
        )
        reply_ids = [
            reply['id'].split('-')[-1]
            for reply in reply_tag
        ]
    except AttributeError:
        reply_ids = None

    return reply_ids

def scrape_comments(blog, base_url, comments):
    """
    Helper function for scraping the comments of a single thread (blog post).
    Returns a DataFrame with the following data:
        - author
        - author-href
        - time
        - comment-href
        - comment-id
        - content
        - child-ids

    Parameters
    ----------
    blog : String
        Character representation of source blog; Gowers (G), Tao (T), Polymath
        projects blog (P)
    comments : ResultSet
    """
    comment_dict = {
        'author' : [],
        'author-href' : [],
        'time' : [],
        'comment-href' : [],
        'comment-id' : [],
        'content' : [],
        'child-ids' : []
    }

    for comment in comments:
        comment_id = comment['id'].split('-')[-1]

        if blog == 'G':
            # Gowers' blog
            cite_tag = comment.find('cite')
            author = cite_tag.get_text(strip=True)
            author_href = (
                cite_tag.find('a')['href']
                if cite_tag.find('a') else None
            )
            metadata_tag = comment.find('small')
            datetime_tag = metadata_tag.find('a', href=str('#comment-' + comment_id))
            datetime_str = datetime_tag.get_text(strip=True)
            comment_href = urljoin(base_url, metadata_tag.find('a')['href'])
            content_tag = comment.find_all('p')
        elif blog == 'T':
            # Tao's blog
            author_tag = comment.find(class_='comment-author').strong
            author = author_tag.get_text(strip=True)
            author_href = (
                author_tag.find('a')['href']
                if author_tag.find('a') else None
            )
            datetime_tag = comment.find(class_='comment-permalink')
            datetime_str = datetime_tag.get_text(strip=True)
            comment_href = comment.find(class_='comment-permalink').find('a')['href']
            content_tag = comment.find(class_='comment-content')
        elif blog == 'K':
            # Gil Kalai's blog
            author_tag = comment.find(class_=lambda x : x and x.startswith('comment-author'))
            cite_tag = author_tag.find('cite')
            author = cite_tag.get_text(strip=True)
            author_href = (
                cite_tag.find('a')['href']
                if cite_tag.find('a') else None
            )
            metadata_tag = comment.find(class_=lambda x : x and x.startswith('comment-meta'))
            datetime_str = metadata_tag.get_text(strip=True)
            comment_href = metadata_tag.find('a')['href']
            content_tag = comment.find(class_='comment-body')
        elif blog == 'D' or blog == 'M':
            # Dustin Mixon's blog
            comment_tag = comment.find(class_='comment-body').find(class_='comment-meta')
            author_tag = comment_tag.find(
                class_=lambda x : x and x.startswith('comment-author')
            )
            if blog == 'D':
                author_tag = author_tag.find('b', class_='fn')
            else:
                author_tag = author_tag.find('cite', class_='fn')
            author = author_tag.get_text(strip=True)
            author_href = (
                author_tag.find('a')['href']
                if author_tag.find('a') else None
            )
            metadata_tag = comment_tag.find(class_='comment-metadata')
            datetime_str = metadata_tag.find('time').get_text(strip=True)
            comment_href = metadata_tag.find('a')['href']
            content_tag = comment.find(class_='comment-content')
        else:
            # Polymath projects blog
            metadata_tag = comment.find(
                id=lambda x : x and x.startswith('pd_rating_holder')
            ).find_next_sibling('p').find('cite')
            author_tag = metadata_tag.find('span', class_='fn')
            author = author_tag.get_text(strip=True)
            author_href = (
                author_tag.find('a')['href']
                if author_tag.find('a') else None
            )
            datetime_str = metadata_tag.get_text(strip=True).split('— ')[-1]
            for a in metadata_tag('a', href=True):
                if a['href'].startswith('#comment-'):
                    comment_href = urljoin(base_url, a['href'])
            content_tag = comment.find(
                class_=lambda x : x and x.startswith('comment-author')
            )

        # Get comment content
        content_list = []
        if blog == 'P':
            for child in content_tag:
                if child.name == 'div':
                    break
                content_list.append(child.get_text(separator=' ', strip=True))
        else:
            for p in content_tag:
                content_list.append(p.get_text(separator=' ', strip=True))
        content = ' '.join(content_list)
        content = ' '.join(content.split()) # remove extra whitespace

        reply_ids = get_child_ids(blog, comment)

        # Store data in dictionary
        comment_dict['author'].append(author)
        comment_dict['author-href'].append(author_href)
        comment_dict['time'].append(parse_datetime(datetime_str))
        comment_dict['comment-href'].append(comment_href)
        comment_dict['comment-id'].append(comment_id)
        comment_dict['content'].append(content)
        comment_dict['child-ids'].append(reply_ids)
    return pd.DataFrame(comment_dict)

def scrape_page(blog, url):
    """
    Given a single page of comments, returns a DataFrame with the
    following data for each comment: TBD

    Parameters
    ----------
    blog : String
    url : String
    """
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Check for comment pagination
    nav_tag = soup.find('div', class_='navigation')
    if nav_tag and blog == 'T':
        next_tag = nav_tag.find('div', class_='alignleft').find('a')
        next_url = (next_tag['href'] if next_tag else None)
    else:
        next_url = None

    # Replace LaTeX figures with alt text
    tex_tag = soup.find_all('img', class_='latex')
    for t in tex_tag:
        alt = t.get('alt', '')
        t.replace_with('$' + alt + '$')

    # Get list of comments
    comment_tag = 'div' if blog == 'T' else 'li'
    comments = soup.find_all(
        comment_tag,
        class_=lambda x : x and x.startswith('comment'),
        id=lambda x : x and (x.startswith('comment-') or x.startswith('li-comment-'))
    )

    if comments:
        df = scrape_comments(blog, url, comments)
        df['blog'] = blog
        post_env = 'div'
        if blog == 'T':
            post_env = 'h1'
            # post_tag = soup.find('h1', id=lambda x :x and x.startswith('post'))
        elif blog == 'D' or blog == 'M':
            post_env = 'article'
            # post_tag = soup.find('article')
        post_tag = soup.find(post_env, id=lambda x : x and x.startswith('post'))
        df['post-id'] = post_tag['id'].split('-')[-1]
        return df, next_url

    return None

def scrape_thread(blog, url):
    """
    Concatenate and format data for all pages associated with a single thread.
    """
    df = pd.DataFrame()
    while url:
        df_comments, url = scrape_page(blog, url)
        df = pd.concat([df, df_comments], ignore_index=True)

    # Create custom ID
    df['id'] = df['blog'] + df['post-id'] + '-' + df['comment-id']

    # Get parent references
    df['in-reply-to'] = None
    df['in-reply-to-href'] = None
    for _, row in df.iterrows():
        parent_id = row['id']
        child_ids = row['child-ids']
        if child_ids:
            for child in child_ids:
                df.loc[df['comment-id'] == child, ['in-reply-to', 'in-reply-to-href']] = [row['id'], row['comment-href']]
    df.loc[df['in-reply-to'].notnull(), 'id'] = (
        df.loc[df['in-reply-to'].notnull(), 'blog'] + 'r' +
        df.loc[df['in-reply-to'].notnull(), 'post-id'] + '-' +
        df.loc[df['in-reply-to'].notnull(), 'comment-id']
    )

    return df

def scrape_project(thread_list):
    """
    """
    df = pd.DataFrame()
    for (blog, url) in thread_list:
        df_thread = scrape_thread(blog, url)
        df = pd.concat([df, df_thread], ignore_index=True)
    return df

In [None]:
###  SAVING  ###

def configure_columns(project_id, df):
    """
    Parameters
    ----------
    df : DataFrame
    column_order : list
    """
    df['project-id'] = project_id
    df['id'] = project_id + '-' + df['id']
    df['in-reply-to'] = project_id + '-' + df['in-reply-to']
    all_projects.append(df)
    return df

def download_df(df, output_name, format='json', index=False):
    if format == 'csv':
        df.to_csv(output_name, index=index)
    elif format == 'xlsx':
        df.to_excel(output_name, index=index)
    else:
        df.to_json(output_name, orient='records')
    files.download(output_name)

# Scrape posts

In [None]:
url_dict = parse_url_file()
df_posts = scrape(url_dict)
download_df(df_posts, 'data-project-posts.json')

# Scrape comments

## Polymath1

In [None]:
poly1_thread_dict = {
    '1-199' : ('G', 'https://gowers.wordpress.com/2009/02/01/a-combinatorial-approach-to-density-hales-jewett/'),
    '200-299' : ('T', 'https://terrytao.wordpress.com/2009/02/05/upper-and-lower-bounds-for-the-density-hales-jewett-problem/'),
    '300-399' : ('G', 'https://gowers.wordpress.com/2009/02/06/dhj-the-triangle-removal-approach/'),
    '400-499' : ('G', 'https://gowers.wordpress.com/2009/02/08/dhj-quasirandomness-and-obstructions-to-uniformity/'),
    '500-599' : ('G', 'https://gowers.wordpress.com/2009/02/13/dhj-possible-proof-strategies/'),
    '600-699' : ('T', 'https://terrytao.wordpress.com/2009/02/11/a-reading-seminar-on-density-hales-jewett/'),
    '700-799' : ('T', 'https://terrytao.wordpress.com/2009/02/13/bounds-for-the-first-few-density-hales-jewett-numbers-and-related-quantities/'),
    '800-849' : ('G', 'http://gowers.wordpress.com/2009/02/23/brief-review-of-polymath1/'),
    '850-900' : ('G', 'https://gowers.wordpress.com/2009/03/02/dhj3-851-899/'),
    '900-999' : ('T', 'https://terrytao.wordpress.com/2009/03/04/dhj3-900-999-density-hales-jewett-type-numbers/'),
    '1000-1049' : ('G', 'https://gowers.wordpress.com/2009/03/10/problem-solved-probably/'),
    '1050-1099' : ('G', 'http://gowers.wordpress.com/2009/03/16/dhj3-and-related-results-1050-1099/'),
    '1100-1199' : ('T', 'https://terrytao.wordpress.com/2009/03/14/dhj3-1100-1199-density-hales-jewett-type-numbers/'),
    '1200-1299' : ('T', 'http://terrytao.wordpress.com/2009/03/30/dhjk-1200-1299-density-hales-jewett-type-numbers/')
}
poly1_thread_list = poly1_thread_dict.values()

In [None]:
df_poly1 = scrape_project(poly1_thread_list)
df_poly1 = configure_columns('P1', df_poly1)
df_poly1

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,gowers,https://gowers.wordpress.com,2009-02-01 20:59:00,https://gowers.wordpress.com/2009/02/01/a-comb...,1646,1. A quick question. Furstenberg and Katznelso...,,G,300,P1-G300-1646,,,P1
1,jozsef,,2009-02-01 21:08:00,https://gowers.wordpress.com/2009/02/01/a-comb...,1648,2. In this note I will try to argue that we sh...,,G,300,P1-G300-1648,,,P1
2,Jason Dyer,http://numberwarrior.wordpress.com,2009-02-01 21:23:00,https://gowers.wordpress.com/2009/02/01/a-comb...,1649,3. I find it reassuring the first thing I thou...,,G,300,P1-G300-1649,,,P1
3,Terence Tao,http://www.math.ucla.edu/~tao,2009-02-01 21:26:00,https://gowers.wordpress.com/2009/02/01/a-comb...,1650,4. As Gil pointed out in his post on this proj...,,G,300,P1-G300-1650,,,P1
4,Terence Tao,http://www.math.ucla.edu/~tao,2009-02-01 21:30:00,https://gowers.wordpress.com/2009/02/01/a-comb...,1651,"5. Incidentally, I only learned in the process...",,G,300,P1-G300-1651,,,P1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1503,Gil Kalai,http://gilkalai.wordpress.com/,2009-04-04 23:28:00,https://terrytao.wordpress.com/2009/03/30/dhjk...,37905,1240. re: David’s 1218 There are some studies ...,,T,1984,P1-T1984-37905,,,P1
1504,Kristal Cantwell,,2009-04-05 12:15:00,https://terrytao.wordpress.com/2009/03/30/dhjk...,37922,1241. 4D Moser If a Moser set has 4 points wit...,,T,1984,P1-T1984-37922,,,P1
1505,D. Eppstein,http://11011110.livejournal.com/,2009-04-05 13:57:00,https://terrytao.wordpress.com/2009/03/30/dhjk...,37925,"1242. Fujimura disjoint triangle cover Ok, let...",,T,1984,P1-T1984-37925,,,P1
1506,D. Eppstein,http://11011110.livejournal.com/,2009-04-05 16:32:00,https://terrytao.wordpress.com/2009/03/30/dhjk...,37930,1243. Fujimura disjoint triangle cover I forgo...,,T,1984,P1-T1984-37930,,,P1


In [None]:
# Check: Gower replies
# df_poly1[df_poly1['id'] == 'P1-G499-2925']
# df_poly1[df_poly1['in-reply-to'] == 'P1-G499-2925']

In [None]:
# Check: Gower LaTeX parsing
df_poly1[df_poly1['comment-id'] == '1658']['content'].iloc[0]

'7. With reference to Jozsef’s comment, if we suppose that the $d$ numbers used to generate the set are indeed independent, then it’s natural to label a typical point of the Cartesian product as $(\\epsilon,\\eta)$ , where each of $\\epsilon$ and $\\eta$ is a $01$ -sequence of length $d$ . Then a corner is a triple of the form $(\\epsilon,\\eta)$ , $(\\epsilon,\\eta+\\delta)$ , $(\\epsilon+\\delta,\\eta)$ , where $\\delta$ is a $\\{-1,0,1\\}$ -valued sequence of length $d$ with the property that both $\\epsilon+\\delta$ and $\\eta+\\delta$ are $01$ -sequences. So the question is whether corners exist in every dense subset of the original Cartesian product. This is simpler than the density Hales-Jewett problem in at least one respect: it involves $01$ -sequences rather than $012$ -sequences. But that simplicity may be slightly misleading because we are looking for corners in the Cartesian product. A possible disadvantage is that in this formulation we lose the symmetry of the corners: t

In [None]:
# Check: Tao LaTeX parsing
df_poly1[df_poly1['comment-id'] == '35662']['content'].iloc[0]

'241. Upper bound for $c_5 < 156$ This proof is along the lines of Sune.90 Sune shows there is just one way to place 18 points in a cube, and that at most 52 points fit in $3^4$ Suppose 156 points may be chosen in $3^5$ . There must be 52 points in each slice of $3^4$ Divide $3^5$ into nine cubes. There are 17 or 18 points in each cube, and one cube in each row and column has 18 points. For example 17, 17, 18 18, 17, 17 17, 18, 17 Cut the cubes further, into squares. The 18-point cubes must cut into three six-point slices x y and z, but the 17-point squares have more variation pqr, stu, xyz xyz, abc, def ghk, xyz, lmn Take the three cubes in the top row, and slice them along a different axis so they are psx, qty and ruz. One of these cubes has 18 points, and so is xyz; so r=x and u=y. Similar logic in second column gives u=x and c=y. So there is a contradiction. There are four other ways to place the 17-point and 18-point cubes, but they all lead to contradictions. So 156 points can’t 

In [None]:
# TO DO: row without content
# row = df_poly1.loc[df_poly1['comment-id'] == '2398']

In [None]:
download_df(df_poly1, 'polymath1.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath2

In [None]:
df_poly2 = scrape_project([('G', 'https://gowers.wordpress.com/2009/02/17/must-an-explicitly-defined-banach-space-contain-c_0-or-ell_p/')])
df_poly2 = configure_columns('P2', df_poly2)
df_poly2

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,gowers,https://gowers.wordpress.com,2009-02-18 09:53:00,https://gowers.wordpress.com/2009/02/17/must-a...,2252,1. A long time ago when I was thinking about t...,,G,449,P2-G449-2252,,,P2
1,Alec Edgington,,2009-02-18 16:12:00,https://gowers.wordpress.com/2009/02/17/must-a...,2256,Hello Tim. I too am looking forward to seeing ...,,G,449,P2-G449-2256,,,P2
2,gowers,https://gowers.wordpress.com,2009-02-18 19:42:00,https://gowers.wordpress.com/2009/02/17/must-a...,2258,"Alec, Tim himself asked a similar question in ...",,G,449,P2-G449-2258,,,P2
3,toomuchcoffeeman,http://ifwisdomwereteachable.wordpress.com,2009-02-20 02:42:00,https://gowers.wordpress.com/2009/02/17/must-a...,2285,"Interesting discussion, though not one where I...",,G,449,P2-G449-2285,,,P2
4,Timothy Chow,http://alum.mit.edu/www/tchow,2009-02-20 04:01:00,https://gowers.wordpress.com/2009/02/17/must-a...,2287,Here’s one way to probe “how much induction” i...,,G,449,P2-G449-2287,,,P2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,Mr NuttELAVer7Ya,http://mrnuttelaver7ya.wordpress.com,2024-05-18 13:59:00,https://gowers.wordpress.com/2009/02/17/must-a...,537496,Fix him for her nvisaversa,,G,449,P2-G449-537496,,,P2
63,Mr NuttELAVer7Ya,http://mrnuttelaver7ya.wordpress.com,2024-05-18 14:15:00,https://gowers.wordpress.com/2009/02/17/must-a...,537497,,,G,449,P2-G449-537497,,,P2
64,Mr NuttELAVer7Ya,http://mrnuttelaver7ya.wordpress.com,2024-05-18 14:15:00,https://gowers.wordpress.com/2009/02/17/must-a...,537498,Utwinprimeconjecture solved—-,,G,449,P2-G449-537498,,,P2
65,Mr NuttELAVer7Ya,http://mrnuttelaver7ya.wordpress.com,2024-05-18 14:25:00,https://gowers.wordpress.com/2009/02/17/must-a...,537499,Obtain resolution through compromise in times ...,,G,449,P2-G449-537499,,,P2


In [None]:
download_df(df_poly2, 'polymath2.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath3

In [None]:
poly3_urls = """
https://gilkalai.wordpress.com/2010/09/29/polymath-3-polynomial-hirsch-conjecture/
https://gilkalai.wordpress.com/2010/10/03/polymath-3-the-polynomial-hirsch-conjecture-2/
https://gilkalai.wordpress.com/2010/10/10/polymath3-polynomial-hirsch-conjecture-3/
https://gilkalai.wordpress.com/2010/10/21/polymath3-polynomial-hirsch-conjecture-4/
https://gilkalai.wordpress.com/2010/11/28/polynomial-hirsch-conjecture-5-abstractions-and-counterexamples/
https://gilkalai.wordpress.com/2011/04/13/polymath3-phc6-the-polynomial-hirsch-conjecture-a-topological-approach/
https://gilkalai.wordpress.com/2009/08/09/the-polynomial-hirsch-conjecture-discussion-thread/
https://gilkalai.wordpress.com/2009/10/06/the-polynomial-hirsch-conjecture-discussion-thread-continued/
"""
poly3_thread_list = parse_urls(poly3_urls)
poly3_thread_list

[('K',
  'https://gilkalai.wordpress.com/2010/09/29/polymath-3-polynomial-hirsch-conjecture/'),
 ('K',
  'https://gilkalai.wordpress.com/2010/10/03/polymath-3-the-polynomial-hirsch-conjecture-2/'),
 ('K',
  'https://gilkalai.wordpress.com/2010/10/10/polymath3-polynomial-hirsch-conjecture-3/'),
 ('K',
  'https://gilkalai.wordpress.com/2010/10/21/polymath3-polynomial-hirsch-conjecture-4/'),
 ('K',
  'https://gilkalai.wordpress.com/2010/11/28/polynomial-hirsch-conjecture-5-abstractions-and-counterexamples/'),
 ('K',
  'https://gilkalai.wordpress.com/2011/04/13/polymath3-phc6-the-polynomial-hirsch-conjecture-a-topological-approach/'),
 ('K',
  'https://gilkalai.wordpress.com/2009/08/09/the-polynomial-hirsch-conjecture-discussion-thread/'),
 ('K',
  'https://gilkalai.wordpress.com/2009/10/06/the-polynomial-hirsch-conjecture-discussion-thread-continued/')]

In [None]:
df_poly3 = scrape_project(poly3_thread_list)
df_poly3 = configure_columns('P3', df_poly3)
df_poly3

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Nicolai Hähnle,,2010-09-30 12:52:00,https://gilkalai.wordpress.com/2010/09/29/poly...,3397,"Dear Gil, I’ve only recently thought about thi...",,K,5439,P3-K5439-3397,,,P3
1,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2010-09-30 17:25:00,https://gilkalai.wordpress.com/2010/09/29/poly...,3398,"Dear Nicolai, This is very interesting! Pleas ...",[3428],K,5439,P3-K5439-3398,,,P3
2,Nicolai Hähnle,,2010-10-01 13:18:00,https://gilkalai.wordpress.com/2010/09/29/poly...,3428,Here’s a construction giving d(n-1)+1 that par...,,K,5439,P3-Kr5439-3428,P3-K5439-3398,https://gilkalai.wordpress.com/2010/09/29/poly...,P3
3,Terence Tao,http://www.math.ucla.edu/~tao,2010-09-30 19:32:00,https://gilkalai.wordpress.com/2010/09/29/poly...,3399,I’ve started a wiki page for this project at h...,,K,5439,P3-K5439-3399,,,P3
4,Terence Tao,http://www.math.ucla.edu/~tao,2010-09-30 20:10:00,https://gilkalai.wordpress.com/2010/09/29/poly...,3401,One place to get started is to try to work out...,,K,5439,P3-K5439-3401,,,P3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
592,Oinky,,2009-10-15 21:47:00,https://gilkalai.wordpress.com/2009/10/06/the-...,1996,Just some musings I thought I’d share…. It occ...,,K,4620,P3-K4620-1996,,,P3
593,Anand Kulkarni,http://www.ocf.berkeley.edu/~anandk,2009-11-04 03:36:00,https://gilkalai.wordpress.com/2009/10/06/the-...,2048,"I apologize for my naivety, but I must still a...",,K,4620,P3-K4620-2048,,,P3
594,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2009-11-06 01:07:00,https://gilkalai.wordpress.com/2009/10/06/the-...,2056,"Dear Oinky, (Belated) thanks for your comment....",,K,4620,P3-K4620-2056,,,P3
595,realsamurai,http://smartfxsol.com,2010-08-01 12:50:00,https://gilkalai.wordpress.com/2009/10/06/the-...,3320,"Forex abner grenselose muligheder, giver for a...",,K,4620,P3-K4620-3320,,,P3


In [None]:
# Check: Kalai nested replies
# df_poly3[df_poly3['in-reply-to'] == 'K5439-3430']

In [None]:
# Check: Kalai LaTeX parsing
df_poly3[df_poly3['comment-id'] == '3403']['content'].iloc[0]

"More generally, one might like to play with the restricted function f'(n), defined as with f(n) except that each of the F_i are forced to be singleton families (i.e. they consist of just one set F_i = A_i, with the $A_1,\\ldots,A_t$ distinct). The condition (*) then becomes that $A_i \\cap A_k \\subset A_j$ whenever $i < j < k$ . It should be possible to compute f'(n) quite precisely. Unfortunately this does not upper bound f(n) since $f'(n) \\leq f(n)$ , but it may offer some intuition."

In [None]:
download_df(df_poly3, 'polymath3.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath4

In [None]:
poly4_urls = """
https://polymathprojects.org/2009/07/27/proposal-deterministic-way-to-find-primes/
https://polymathprojects.org/2009/07/27/proposal-deterministic-way-to-find-primes/
https://polymathprojects.org/2009/08/13/research-thread-iii-determinstic-way-to-find-primes/
https://polymathprojects.org/2009/08/28/research-thread-iv-determinstic-way-to-find-primes/
https://polymathprojects.org/2009/10/27/research-thread-v-determinstic-way-to-find-primes/
https://polymathprojects.org/2010/06/29/draft-version-of-polymath4-paper/
https://polymathprojects.org/2009/07/28/deterministic-way-to-find-primes-discussion-thread/
https://polymathprojects.org/2010/06/29/draft-version-of-polymath4-paper/
"""
poly4_thread_list = parse_urls(poly4_urls)
poly4_thread_list

[('P',
  'https://polymathprojects.org/2009/07/27/proposal-deterministic-way-to-find-primes/'),
 ('P',
  'https://polymathprojects.org/2009/07/27/proposal-deterministic-way-to-find-primes/'),
 ('P',
  'https://polymathprojects.org/2009/08/13/research-thread-iii-determinstic-way-to-find-primes/'),
 ('P',
  'https://polymathprojects.org/2009/08/28/research-thread-iv-determinstic-way-to-find-primes/'),
 ('P',
  'https://polymathprojects.org/2009/10/27/research-thread-v-determinstic-way-to-find-primes/'),
 ('P',
  'https://polymathprojects.org/2010/06/29/draft-version-of-polymath4-paper/'),
 ('P',
  'https://polymathprojects.org/2009/07/28/deterministic-way-to-find-primes-discussion-thread/'),
 ('P',
  'https://polymathprojects.org/2010/06/29/draft-version-of-polymath4-paper/')]

In [None]:
df_poly4 = scrape_project(poly4_thread_list)
df_poly4 = configure_columns('P4', df_poly4)
df_poly4

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,gowers,http://gowers.wordpress.com,2009-07-27 13:47:00,https://polymathprojects.org/2009/07/27/propos...,7,"This is certainly an interesting problem, from...",[8],P,26,P4-P26-7,,,P4
1,Terence Tao,http://www.math.ucla.edu/~tao,2009-07-27 14:47:00,https://polymathprojects.org/2009/07/27/propos...,8,"Dear Tim, Actually I’ve only thought about thi...",,P,26,P4-Pr26-8,P4-P26-7,https://polymathprojects.org/2009/07/27/propos...,P4
2,gowers,http://gowers.wordpress.com,2009-07-27 15:52:00,https://polymathprojects.org/2009/07/27/propos...,11,"Amusingly, the problem occurred to me too when...","[14, 25, 38, 40, 42, 46]",P,26,P4-P26-11,,,P4
3,Terence Tao,http://www.math.ucla.edu/~tao,2009-07-27 17:04:00,https://polymathprojects.org/2009/07/27/propos...,14,I haven’t thought about the problem too deeply...,,P,26,P4-Pr26-14,P4-P26-11,https://polymathprojects.org/2009/07/27/propos...,P4
4,gowers,http://gowers.wordpress.com,2009-07-27 18:39:00,https://polymathprojects.org/2009/07/27/propos...,25,I’d be slightly (but possibly wrongly) surpris...,"[38, 40, 42, 46]",P,26,P4-Pr26-25,P4-P26-11,https://polymathprojects.org/2009/07/27/propos...,P4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
590,Terence Tao,http://www.math.ucla.edu/~tao,2010-09-22 03:21:00,https://polymathprojects.org/2010/06/29/draft-...,2396,The paper is now on the arXiv at http://arxiv....,,P,167,P4-Pr167-2396,P4-P167-2395,https://polymathprojects.org/2010/06/29/draft-...,P4
591,Girish Varma,http://girishvarma.wordpress.com,2010-11-18 16:39:00,https://polymathprojects.org/2010/06/29/draft-...,2431,I wanted to the answer to a slightly different...,,P,167,P4-P167-2431,,,P4
592,Anonymous,,2011-12-23 08:37:00,https://polymathprojects.org/2010/06/29/draft-...,4528,Hello. Is this wonderfully number theoretic th...,,P,167,P4-P167-4528,,,P4
593,Warren D Smith,http://rangevoting.org,2012-03-23 23:58:00,https://polymathprojects.org/2010/06/29/draft-...,5325,"Hello, I was reading http://arxiv.org/pdf/1009...",,P,167,P4-P167-5325,,,P4


In [None]:
download_df(df_poly4, 'polymath4.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath5

In [None]:
poly5_urls = """
https://gowers.wordpress.com/2009/12/17/erdoss-discrepancy-problem/
https://gowers.wordpress.com/2010/01/06/erdss-discrepancy-problem-as-a-forthcoming-polymath-project/
https://gowers.wordpress.com/2010/01/09/erds-discrepancy-problem-continued/
https://gowers.wordpress.com/2010/01/11/the-erds-discrepancy-problem-iii/
https://gowers.wordpress.com/2010/01/14/the-erds-discrepancy-problem-iv/
https://gowers.wordpress.com/2010/01/16/the-erds-discrepancy-problem-v/
https://gowers.wordpress.com/2010/01/19/edp1-the-official-start-of-polymath5/
https://gowers.wordpress.com/2010/01/21/edp2-a-few-lessons-from-edp1/
https://gowers.wordpress.com/2010/01/26/edp3-a-very-brief-report-on-where-we-are/
https://gowers.wordpress.com/2010/01/30/edp4-focusing-on-multiplicative-functions/
https://gowers.wordpress.com/2010/02/02/edp5-another-very-brief-summary/
https://gowers.wordpress.com/2010/02/05/edp6-what-are-the-chances-of-success/
https://gowers.wordpress.com/2010/02/08/edp7-emergency-post/
https://gowers.wordpress.com/2010/02/19/edp8-what-next/
https://gowers.wordpress.com/2010/02/24/edp9-a-change-of-focus/
https://gowers.wordpress.com/2010/03/02/edp10-a-new-and-very-promising-approach/
https://gowers.wordpress.com/2010/03/07/edp11-the-search-continues/
https://gowers.wordpress.com/2010/03/13/edp12-representing-diagonal-maps/
https://gowers.wordpress.com/2010/03/23/edp13-quick-summary/
https://gowers.wordpress.com/2010/04/25/edp14-strategic-questions/
https://gowers.wordpress.com/2010/06/21/edp15-finding-a-diagonal-matrix/
https://gowers.wordpress.com/2010/07/04/edp16-from-ap-discrepancy-to-hap-discrepancy/
https://gowers.wordpress.com/2010/07/18/edp17-are-we-nearly-there/
https://gowers.wordpress.com/2010/09/03/edp18-apparently-p-does-not-equal-np/
https://gowers.wordpress.com/2010/09/06/edp19-removing-some-vagueness/
https://gowers.wordpress.com/2010/09/10/edp20-squares-and-fly-traps/
https://gowers.wordpress.com/2010/09/21/edp21-restrictions-on-possible-proofs/
https://gowers.wordpress.com/2012/08/27/edp23-second-guest-post-by-gil-kalai/
https://gowers.wordpress.com/2012/08/31/edp24-an-attempt-to-get-back-into-the-diagonal-decomposition-approach/
https://gowers.wordpress.com/2012/09/04/edp25-third-guest-post-by-gil-kalai/
https://gowers.wordpress.com/2012/09/06/edp26-three-generalizations/
https://gowers.wordpress.com/2012/09/19/edp27-the-modular-version-of-roths-ap-discrepancy-theorem/
"""
poly5_thread_list = parse_urls(poly5_urls)
poly5_thread_list

[('G', 'https://gowers.wordpress.com/2009/12/17/erdoss-discrepancy-problem/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/06/erdss-discrepancy-problem-as-a-forthcoming-polymath-project/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/09/erds-discrepancy-problem-continued/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/11/the-erds-discrepancy-problem-iii/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/14/the-erds-discrepancy-problem-iv/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/16/the-erds-discrepancy-problem-v/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/19/edp1-the-official-start-of-polymath5/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/21/edp2-a-few-lessons-from-edp1/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/26/edp3-a-very-brief-report-on-where-we-are/'),
 ('G',
  'https://gowers.wordpress.com/2010/01/30/edp4-focusing-on-multiplicative-functions/'),
 ('G',
  'https://gowers.wordpress.com/2010/02/02/edp5-another-very-brief-summary/'),
 ('G',
  

In [None]:
df_poly5 = scrape_project(poly5_thread_list)
df_poly5 = configure_columns('P5', df_poly5)
df_poly5

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Greg Martin,,2009-12-17 14:17:00,https://gowers.wordpress.com/2009/12/17/erdoss...,4497,A Hungarian-umlaut “o” can be generated from U...,[4529],G,1368,P5-G1368-4497,,,P5
1,gowers,https://gowers.wordpress.com,2009-12-19 13:46:00,https://gowers.wordpress.com/2009/12/17/erdoss...,4529,Many thanks — got it sorted out now.,,G,1368,P5-Gr1368-4529,P5-G1368-4497,https://gowers.wordpress.com/2009/12/17/erdoss...,P5
2,ioannis parissis,http://www.math.ist.utl.pt/~parissis,2009-12-17 14:37:00,https://gowers.wordpress.com/2009/12/17/erdoss...,4498,This is a very naive question. But: In the ori...,[4503],G,1368,P5-G1368-4498,,,P5
3,gowers,https://gowers.wordpress.com,2009-12-17 18:28:00,https://gowers.wordpress.com/2009/12/17/erdoss...,4503,The difference is that the APs are always of t...,,G,1368,P5-Gr1368-4503,P5-G1368-4498,https://gowers.wordpress.com/2009/12/17/erdoss...,P5
4,Jason Dyer,http://numberwarrior.wordpress.com,2009-12-17 16:25:00,https://gowers.wordpress.com/2009/12/17/erdoss...,4499,"Constructively, wouldn’t it work to take the C...","[4500, 4501, 4502, 4504]",G,1368,P5-G1368-4499,,,P5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2629,A.Czuron,,2012-12-06 15:47:00,https://gowers.wordpress.com/2012/09/19/edp27-...,31689,Does HAP of length 1 are satisfactory? For exa...,,G,4528,P5-G4528-31689,,,P5
2630,Vedic Mathematics,http://www.mastermindvedicmaths.com,2012-12-25 06:28:00,https://gowers.wordpress.com/2012/09/19/edp27-...,31877,Nice solutions is given by author.,,G,4528,P5-G4528-31877,,,P5
2631,Thomas,,2013-08-20 08:12:00,https://gowers.wordpress.com/2012/09/19/edp27-...,42086,What happened to the revival? It has been almo...,,G,4528,P5-G4528-42086,,,P5
2632,Terence Tao,http://www.math.ucla.edu/~tao,2014-02-11 16:35:00,https://gowers.wordpress.com/2012/09/19/edp27-...,49338,There’s a new paper on the arXiv using SAT sol...,[49415],G,4528,P5-G4528-49338,,,P5


In [None]:
download_df(df_poly5, 'polymath5.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath6

In [None]:
poly6_urls = """
https://polymathprojects.org/2011/02/05/polymath6-improving-the-bounds-for-roths-theorem/
http://gowers.wordpress.com/2011/02/05/polymath6-a-is-to-b-as-c-is-to
"""
poly6_thread_list = parse_urls(poly6_urls)

df_poly6 = scrape_project(poly6_thread_list)
df_poly6 = configure_columns('P6', df_poly6)
df_poly6

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,gowers,http://gowers.wordpress.com,2011-02-06 11:00:00,https://polymathprojects.org/2011/02/05/polyma...,2794,"I have a fairly obvious question, which I thin...",,P,193,P6-P193-2794,,,P6
1,gowers,http://gowers.wordpress.com,2011-02-06 14:39:00,https://polymathprojects.org/2011/02/05/polyma...,2795,"To be slightly more specific, perhaps instead ...",,P,193,P6-P193-2795,,,P6
2,Tom,,2011-02-06 15:04:00,https://polymathprojects.org/2011/02/05/polyma...,2796,One small question which occurs to me from the...,,P,193,P6-P193-2796,,,P6
3,gowers,http://gowers.wordpress.com,2011-02-06 15:18:00,https://polymathprojects.org/2011/02/05/polyma...,2797,A somewhat related remark is this. I think the...,,P,193,P6-P193-2797,,,P6
4,Ben Green,,2011-02-06 16:57:00,https://polymathprojects.org/2011/02/05/polyma...,2799,WIth regard to the comments on Bohr sets above...,,P,193,P6-P193-2799,,,P6
5,Ben Green,,2011-02-06 17:02:00,https://polymathprojects.org/2011/02/05/polyma...,2800,In our offline email exchanges I mentioned tha...,,P,193,P6-P193-2800,,,P6
6,gowers,http://gowers.wordpress.com,2011-02-06 17:40:00,https://polymathprojects.org/2011/02/05/polyma...,2801,Responding to comment 7: I just want to clarif...,,P,193,P6-P193-2801,,,P6
7,Nets Katz,,2011-02-07 02:53:00,https://polymathprojects.org/2011/02/05/polyma...,2803,"Hi all, I am interested in the project of gett...","[2804, 2805]",P,193,P6-P193-2803,,,P6
8,Olof Sisask,,2011-02-07 06:06:00,https://polymathprojects.org/2011/02/05/polyma...,2804,"Hi Nets, I created some sections on the wiki f...",[2805],P,193,P6-Pr193-2804,P6-P193-2803,https://polymathprojects.org/2011/02/05/polyma...,P6
9,Nets Katz,,2011-02-07 06:13:00,https://polymathprojects.org/2011/02/05/polyma...,2805,"Olof, Thanks. I’m not very experienced with wi...",,P,193,P6-Pr193-2805,P6-P193-2804,https://polymathprojects.org/2011/02/05/polyma...,P6


In [None]:
download_df(df_poly6, 'polymath6.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath7

In [None]:
poly7_urls = """
http://polymathprojects.org/2012/06/03/polymath-proposal-the-hot-spots-conjecture-for-acute-triangles
http://polymathprojects.org/2012/06/12/polymath7-research-thread-1-the-hot-spots-conjecture/
http://polymathprojects.org/2012/06/15/polymath7-research-threads-2-the-hot-spots-conjecture/
http://polymathprojects.org/2012/06/24/polymath7-research-threads-3-the-hot-spots-conjecture/
http://polymathprojects.org/2012/09/10/polymath7-research-threads-4-the-hot-spots-conjecture
http://polymathprojects.org/2013/08/09/polymath7-research-thread-5-the-hot-spots-conjecture/
https://polymathprojects.org/2012/06/09/polymath7-discussion-thread/
"""
poly7_thread_list = parse_urls(poly7_urls)

df_poly7 = scrape_project(poly7_thread_list)
df_poly7 = configure_columns('P7', df_poly7)
df_poly7

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Terence Tao,http://www.math.ucla.edu/~tao,2012-06-03 03:09:00,http://polymathprojects.org/2012/06/03/polymat...,5855,Some initial questions to get the ball rolling...,"[5907, 5911, 6008, 6013, 6023]",P,266,P7-P266-5855,,,P7
1,Stuart Anderson,http://www.squaring.net,2012-06-05 08:42:00,http://polymathprojects.org/2012/06/03/polymat...,5907,Brian J. McCartin has recently published a boo...,"[5911, 6008, 6013, 6023]",P,266,P7-Pr266-5907,P7-P266-5855,http://polymathprojects.org/2012/06/03/polymat...,P7
2,meditationatae,http://meditationatae.wordpress.com,2012-06-05 12:06:00,http://polymathprojects.org/2012/06/03/polymat...,5911,In reply to Stuart Anderson: Brian J. McCartin...,"[6008, 6013, 6023]",P,266,P7-Pr266-5911,P7-P266-5907,http://polymathprojects.org/2012/06/03/polymat...,P7
3,Terence Tao,http://www.math.ucla.edu/~tao,2012-06-10 03:39:00,http://polymathprojects.org/2012/06/03/polymat...,6008,I worked out the second eigenspace for the equ...,"[6013, 6023]",P,266,P7-Pr266-6008,P7-P266-5911,http://polymathprojects.org/2012/06/03/polymat...,P7
4,meditationatae,http://meditationatae.wordpress.com,2012-06-10 07:54:00,http://polymathprojects.org/2012/06/03/polymat...,6013,I’m trying to understand how to verify the con...,[6023],P,266,P7-Pr266-6013,P7-P266-6008,http://polymathprojects.org/2012/06/03/polymat...,P7
...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,nilimanigam,http://gravatar.com/nilimanigam,2012-07-21 04:44:00,https://polymathprojects.org/2012/06/09/polyma...,8098,I’ve posted something twice on the research th...,,P,268,P7-P268-8098,,,P7
528,Terence Tao,http://www.math.ucla.edu/~tao,2012-08-09 03:07:00,https://polymathprojects.org/2012/06/09/polyma...,8877,Just a short note to say that I’m still intere...,[8882],P,268,P7-P268-8877,,,P7
529,nilimanigam,http://gravatar.com/nilimanigam,2012-08-09 04:30:00,https://polymathprojects.org/2012/06/09/polyma...,8882,Apologies about the delay from my end- I’ve be...,,P,268,P7-Pr268-8882,P7-P268-8877,https://polymathprojects.org/2012/06/09/polyma...,P7
530,Chris Evans,http://gravatar.com/letmeitellyou,2012-09-12 09:18:00,https://polymathprojects.org/2012/06/09/polyma...,9853,I just wanted to say that Bartlomiej and I are...,[9865],P,268,P7-P268-9853,,,P7


In [None]:
download_df(df_poly7, 'polymath7.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath8

In [None]:
# Check: Scott Morrison single thread
# df_morrison = scrape_thread('M', 'https://sbseminar.wordpress.com/2013/05/30/i-just-cant-resist-there-are-infinitely-many-pairs-of-primes-at-most-59470640-apart/')
# df_morrison

In [None]:
poly8_urls = """
https://terrytao.wordpress.com/2013/06/03/the-prime-tuples-conjecture-sieve-theory-and-the-work-of-goldston-pintz-yildirim-motohashi-pintz-and-zhang/
https://terrytao.wordpress.com/2013/06/04/online-reading-seminar-for-zhangs-bounded-gaps-between-primes/
https://terrytao.wordpress.com/2013/06/08/the-elementary-selberg-sieve-and-bounded-prime-gaps/
https://terrytao.wordpress.com/2013/06/10/a-combinatorial-subset-sum-problem-associated-with-bounded-prime-gaps/
https://terrytao.wordpress.com/2013/06/11/further-analysis-of-the-truncated-gpy-sieve/
https://terrytao.wordpress.com/2013/06/12/estimation-of-the-type-i-and-type-ii-sums/
https://terrytao.wordpress.com/2013/06/14/estimation-of-the-type-iii-sums/
https://terrytao.wordpress.com/2013/06/18/a-truncated-elementary-selberg-sieve-of-pintz/
https://terrytao.wordpress.com/2013/06/23/the-distribution-of-primes-in-densely-divisible-moduli/
https://terrytao.wordpress.com/2013/06/30/bounded-gaps-between-primes-polymath8-a-progress-report/
https://terrytao.wordpress.com/2013/07/27/an-improved-type-i-estimate/
https://terrytao.wordpress.com/2013/08/17/polymath8-writing-the-paper/
https://terrytao.wordpress.com/2013/09/02/polymath8-writing-the-paper-ii/
https://terrytao.wordpress.com/2013/09/22/polymath8-writing-the-paper-iii/
https://terrytao.wordpress.com/2013/10/15/polymath8-writing-the-paper-iv/
https://terrytao.wordpress.com/2013/11/17/polymath8-writing-the-first-paper-v-and-a-look-ahead/
https://terrytao.wordpress.com/2013/11/19/polymath8b-bounded-intervals-with-many-primes-after-maynard/
https://terrytao.wordpress.com/2013/11/22/polymath8b-ii-optimising-the-variational-problem-and-the-sieve/
https://terrytao.wordpress.com/2013/12/08/polymath8b-iii-numerical-optimisation-of-the-variational-problem-and-a-search-for-new-sieves/
https://terrytao.wordpress.com/2013/12/20/polymath8b-iv-enlarging-the-sieve-support-more-efficient-numerics-and-explicit-asymptotics/
https://terrytao.wordpress.com/2014/01/08/polymath8b-v-stretching-the-sieve-support-further/
https://terrytao.wordpress.com/2014/01/17/polymath8b-vi-a-low-dimensional-variational-problem/
https://terrytao.wordpress.com/2014/01/28/polymath8b-vii-using-the-generalised-elliott-halberstam-hypothesis-to-enlarge-the-sieve-support-yet-further/
https://terrytao.wordpress.com/2014/02/09/polymath8b-viii-time-to-start-writing-up-the-results/
https://terrytao.wordpress.com/2014/02/21/polymath8b-ix-large-quadratic-programs/
https://terrytao.wordpress.com/2014/05/17/polymath-8b-xi-finishing-up-the-paper/
https://sbseminar.wordpress.com/2013/05/30/i-just-cant-resist-there-are-infinitely-many-pairs-of-primes-at-most-59470640-apart/
https://sbseminar.wordpress.com/2013/06/05/more-narrow-admissible-sets/
https://sbseminar.wordpress.com/2013/07/02/the-quest-for-narrow-admissible-tuples/
"""
poly8_thread_list = parse_urls(poly8_urls)
poly8_thread_list

[('T',
  'https://terrytao.wordpress.com/2013/06/03/the-prime-tuples-conjecture-sieve-theory-and-the-work-of-goldston-pintz-yildirim-motohashi-pintz-and-zhang/'),
 ('T',
  'https://terrytao.wordpress.com/2013/06/04/online-reading-seminar-for-zhangs-bounded-gaps-between-primes/'),
 ('T',
  'https://terrytao.wordpress.com/2013/06/08/the-elementary-selberg-sieve-and-bounded-prime-gaps/'),
 ('T',
  'https://terrytao.wordpress.com/2013/06/10/a-combinatorial-subset-sum-problem-associated-with-bounded-prime-gaps/'),
 ('T',
  'https://terrytao.wordpress.com/2013/06/11/further-analysis-of-the-truncated-gpy-sieve/'),
 ('T',
  'https://terrytao.wordpress.com/2013/06/12/estimation-of-the-type-i-and-type-ii-sums/'),
 ('T',
  'https://terrytao.wordpress.com/2013/06/14/estimation-of-the-type-iii-sums/'),
 ('T',
  'https://terrytao.wordpress.com/2013/06/18/a-truncated-elementary-selberg-sieve-of-pintz/'),
 ('T',
  'https://terrytao.wordpress.com/2013/06/23/the-distribution-of-primes-in-densely-divisib

In [None]:
df_poly8 = scrape_project(poly8_thread_list)
df_poly8 = configure_columns('P8', df_poly8)
df_poly8

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Gergely Harcos,http://www.renyi.hu/~gharcos/,2013-09-12 16:21:00,https://terrytao.wordpress.com/2013/06/03/the-...,244903,"In the second display below (12), $\frac{n}{d}...","[247235, 247237]",T,6728,P8-T6728-244903,,,P8
1,Stijn Hanson,https://plus.google.com/116601422779638833535,2013-10-04 11:40:00,https://terrytao.wordpress.com/2013/06/03/the-...,247234,My apologies for all the help I’m requiring bu...,"[247235, 247237]",T,6728,P8-T6728-247234,,,P8
2,Terence Tao,http://www.math.ucla.edu/~tao,2013-10-04 12:14:00,https://terrytao.wordpress.com/2013/06/03/the-...,247235,"One cannot conclude $|F(w(x),x)| \leq \frac{2}...",[247237],T,6728,P8-Tr6728-247235,P8-T6728-247234,https://terrytao.wordpress.com/2013/06/03/the-...,P8
3,Stijn Hanson,https://plus.google.com/116601422779638833535,2013-10-04 12:43:00,https://terrytao.wordpress.com/2013/06/03/the-...,247237,I assumed it had something to do with that w r...,,T,6728,P8-Tr6728-247237,P8-T6728-247235,https://terrytao.wordpress.com/2013/06/03/the-...,P8
4,wanglaoxinr,http://none,2015-05-16 00:13:00,https://terrytao.wordpress.com/2013/06/03/the-...,455907,"for Cramér Conjecture ，change Pn to x, will th...",[461131],T,6728,P8-T6728-455907,,,P8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3499,Andrew Sutherland,http://math.mit.edu/~drew,2013-09-03 01:19:00,https://sbseminar.wordpress.com/2013/07/02/the...,24537,@Wouter: for k0=6329 and p_exh=17 I get the in...,,M,5146,P8-M5146-24537,,,P8
3500,Wouter Castryck,,2013-09-03 05:57:00,https://sbseminar.wordpress.com/2013/07/02/the...,24538,"Hi, my slowish implementation is still running...",,M,5146,P8-M5146-24538,,,P8
3501,Andrew Sutherland,http://math.mit.edu/~drew,2013-09-03 06:24:00,https://sbseminar.wordpress.com/2013/07/02/the...,24539,I think it probably makes sense to go with the...,,M,5146,P8-M5146-24539,,,P8
3502,Andrew Sutherland,http://math.mit.edu/~drew,2013-09-03 06:55:00,https://sbseminar.wordpress.com/2013/07/02/the...,24540,"Regarding #38, I realized my implementation wa...",,M,5146,P8-M5146-24540,,,P8


In [None]:
download_df(df_poly8, 'polymath8.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath9

In [None]:
poly9_urls = """
https://gowers.wordpress.com/2013/10/24/what-i-did-in-my-summer-holidays/
https://gowers.wordpress.com/2013/11/03/dbd1-initial-post/
https://gowers.wordpress.com/2014/01/09/dbd2-success-of-a-kind/
"""
poly9_thread_list = parse_urls(poly9_urls)

df_poly9 = scrape_project(poly9_thread_list)
df_poly9 = configure_columns('P9', df_poly9)
df_poly9

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,meditationatae,http://meditationatae.wordpress.com,2013-10-24 19:36:00,https://gowers.wordpress.com/2013/10/24/what-i...,43602,I have been really intrigued by P=?NP : “How c...,[43616],G,5141,P9-G5141-43602,,,P9
1,gowers,https://gowers.wordpress.com,2013-10-25 16:06:00,https://gowers.wordpress.com/2013/10/24/what-i...,43616,I have no objection to that.,,G,5141,P9-Gr5141-43616,P9-G5141-43602,https://gowers.wordpress.com/2013/10/24/what-i...,P9
2,E.L. Wisty,,2013-10-25 14:05:00,https://gowers.wordpress.com/2013/10/24/what-i...,43614,Reblogged this on Pink Iguana and commented: L...,,G,5141,P9-G5141-43614,,,P9
3,Richard Elwes,http://www.richardelwes.co.uk,2013-10-25 15:30:00,https://gowers.wordpress.com/2013/10/24/what-i...,43615,If this project results in an interesting theo...,,G,5141,P9-G5141-43615,,,P9
4,meditationatae,http://meditationatae.wordpress.com,2013-10-25 20:41:00,https://gowers.wordpress.com/2013/10/24/what-i...,43621,Reblogged this on meditationatae .,,G,5141,P9-G5141-43621,,,P9
...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,vznvzn,http://vzn1.wordpress.com,2014-01-11 16:53:00,https://gowers.wordpress.com/2014/01/09/dbd2-s...,45245,"bummer, yet still progress. it is better to ha...",,G,5256,P9-G5256-45245,,,P9
101,gowers,https://gowers.wordpress.com,2014-01-12 16:44:00,https://gowers.wordpress.com/2014/01/09/dbd2-s...,45293,Pavel has just emailed me with a further obser...,[53616],G,5256,P9-G5256-45293,,,P9
102,Jason Dyer,http://numberwarrior.wordpress.com,2014-02-27 19:12:00,https://gowers.wordpress.com/2014/01/09/dbd2-s...,53616,Did this move to email collaboration? As far m...,,G,5256,P9-Gr5256-53616,P9-G5256-45293,https://gowers.wordpress.com/2014/01/09/dbd2-s...,P9
103,Jason Dyer,http://numberwarrior.wordpress.com,2014-02-27 19:30:00,https://gowers.wordpress.com/2014/01/09/dbd2-s...,53621,If you open up to “either player can move on a...,[53842],G,5256,P9-G5256-53621,,,P9


In [None]:
download_df(df_poly9, 'polymath9.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath10

In [None]:
poly10_urls = """
https://gilkalai.wordpress.com/2015/11/03/polymath10-the-erdos-rado-delta-system-conjecture/
https://gilkalai.wordpress.com/2015/11/11/polymath10-post-2-homological-approach/
https://gilkalai.wordpress.com/2015/12/08/polymath-10-post-3-how-are-we-doing/
https://gilkalai.wordpress.com/2016/01/31/polymath10-post-4-back-to-the-drawing-board/
"""
poly10_thread_list = parse_urls(poly10_urls)

df_poly10 = scrape_project(poly10_thread_list)
df_poly10 = configure_columns('P10', df_poly10)
df_poly10

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,shacharlovett,,2015-11-03 05:42:00,https://gilkalai.wordpress.com/2015/11/03/poly...,22185,"Hi Gil, I cannot see some of the equations as ...",[22193],K,13306,P10-K13306-22185,,,P10
1,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2015-11-03 12:25:00,https://gilkalai.wordpress.com/2015/11/03/poly...,22193,"Dear Shachar, thanks! I fixed it (I hope)",,K,13306,P10-Kr13306-22193,P10-K13306-22185,https://gilkalai.wordpress.com/2015/11/03/poly...,P10
2,gowers,http://gowers.wordpress.com,2015-11-03 14:16:00,https://gilkalai.wordpress.com/2015/11/03/poly...,22197,"I hope to leave a more sensible comment soon, ...",,K,13306,P10-K13306-22197,,,P10
3,gowers,http://gowers.wordpress.com,2015-11-03 14:37:00,https://gilkalai.wordpress.com/2015/11/03/poly...,22199,I’ve just looked up what the middle part of a ...,[22202],K,13306,P10-K13306-22199,,,P10
4,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2015-11-03 15:41:00,https://gilkalai.wordpress.com/2015/11/03/poly...,22202,"Dear Tim, I think that Erdos-Ko-Rado theory is...",,K,13306,P10-Kr13306-22202,P10-K13306-22199,https://gilkalai.wordpress.com/2015/11/03/poly...,P10
...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2016-02-18 07:10:00,https://gilkalai.wordpress.com/2016/01/31/poly...,23878,Since we did not have comments for a while let...,"[23880, 23885, 23889]",K,13447,P10-K13447-23878,,,P10
357,domotorp,http://www.cs.elte.hu/~dom,2016-02-18 16:43:00,https://gilkalai.wordpress.com/2016/01/31/poly...,23880,Great to know. I would be very interested in s...,"[23885, 23889]",K,13447,P10-Kr13447-23880,P10-K13447-23878,https://gilkalai.wordpress.com/2016/01/31/poly...,P10
358,Ferdinand Ihringer,http://math.ihringer.org,2016-02-18 20:18:00,https://gilkalai.wordpress.com/2016/01/31/poly...,23885,I would be very interested as well. Some thing...,,K,13447,P10-Kr13447-23885,P10-K13447-23880,https://gilkalai.wordpress.com/2016/01/31/poly...,P10
359,Ferdinand Ihringer,http://math.ihringer.org,2016-02-20 17:19:00,https://gilkalai.wordpress.com/2016/01/31/poly...,23889,Now that I am going through my write-up yet an...,,K,13447,P10-Kr13447-23889,P10-K13447-23880,https://gilkalai.wordpress.com/2016/01/31/poly...,P10


In [None]:
download_df(df_poly10, 'polymath10.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath11

In [None]:
poly11_urls = """
https://gowers.wordpress.com/2016/01/21/frankls-union-closed-conjecture-a-possible-polymath-project/
https://gowers.wordpress.com/2016/01/29/func1-strengthenings-variants-potential-counterexamples/
https://gowers.wordpress.com/2016/02/08/func2-more-examples/
https://gowers.wordpress.com/2016/02/13/func3-further-strengthenings-and-variants/
https://gowers.wordpress.com/2016/02/22/func4-further-variants/
"""
poly11_thread_list = parse_urls(poly11_urls)

df_poly11 = scrape_project(poly11_thread_list)
df_poly11 = configure_columns('P11', df_poly11)
df_poly11

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,David Bevan,http://dibevan.wordpress.com,2016-01-21 13:48:00,https://gowers.wordpress.com/2016/01/21/frankl...,153615,Perhaps a suitable initial step would be to st...,[153620],G,5979,P11-G5979-153615,,,P11
1,gowers,https://gowers.wordpress.com,2016-01-21 16:51:00,https://gowers.wordpress.com/2016/01/21/frankl...,153620,I agree that that seems very sensible. Accordi...,,G,5979,P11-Gr5979-153620,P11-G5979-153615,https://gowers.wordpress.com/2016/01/21/frankl...,P11
2,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2016-01-21 16:59:00,https://gowers.wordpress.com/2016/01/21/frankl...,153622,Cool! let’s see how it goes! I will try to tak...,,G,5979,P11-G5979-153622,,,P11
3,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2016-01-21 19:13:00,https://gowers.wordpress.com/2016/01/21/frankl...,153624,Let me also mention that a MathOverflow questi...,,G,5979,P11-G5979-153624,,,P11
4,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2016-01-21 19:17:00,https://gowers.wordpress.com/2016/01/21/frankl...,153625,"And for polymath connoisseurs, I posed a sort ...",,G,5979,P11-G5979-153625,,,P11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
675,jean-camille,,2018-02-13 04:59:00,https://gowers.wordpress.com/2016/02/22/func4-...,256259,I forget a fifth axiom : any member of the “Un...,,G,6118,P11-Gr6118-256259,P11-G6118-256242,https://gowers.wordpress.com/2016/02/22/func4-...,P11
676,jean-camille,,2018-02-13 16:15:00,https://gowers.wordpress.com/2016/02/22/func4-...,256361,No this fifth axiom is not good at all : I wan...,,G,6118,P11-Gr6118-256361,P11-G6118-256242,https://gowers.wordpress.com/2016/02/22/func4-...,P11
677,jean-camille,,2018-02-13 04:01:00,https://gowers.wordpress.com/2016/02/22/func4-...,256244,NOT : $t\in \bigcap T$ BUT $t\in \bigcup U$ ot...,,G,6118,P11-G6118-256244,,,P11
678,Roy Abrams,,2018-07-05 18:46:00,https://gowers.wordpress.com/2016/02/22/func4-...,321414,You can assign numbers to the sets in a union-...,[321417],G,6118,P11-G6118-321414,,,P11


In [None]:
download_df(df_poly11, 'polymath11.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath12

In [None]:
poly12_urls = """
https://polymathprojects.org/2017/02/23/rotas-basis-conjecture-polymath-12/
https://polymathprojects.org/2017/03/06/rotas-basis-conjecture-polymath-12-2/
https://polymathprojects.org/2017/05/05/rotas-basis-conjecture-polymath-12-post-3/
"""
poly12_thread_list = parse_urls(poly12_urls)

df_poly12 = scrape_project(poly12_thread_list)
df_poly12 = configure_columns('P12', df_poly12)
df_poly12

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2017-02-24 05:45:00,https://polymathprojects.org/2017/02/23/rotas-...,115048,"Congratulation, Tim, for launching polymath12....",,P,484,P12-P484-115048,,,P12
1,Thomas Sauvaget,,2017-02-24 08:22:00,https://polymathprojects.org/2017/02/23/rotas-...,115050,This looks interesting! In the spirit of polym...,"[115051, 115058]",P,484,P12-P484-115050,,,P12
2,Thomas Sauvaget,,2017-02-24 10:04:00,https://polymathprojects.org/2017/02/23/rotas-...,115051,The indices I’ve used should be different of c...,,P,484,P12-Pr484-115051,P12-P484-115050,https://polymathprojects.org/2017/02/23/rotas-...,P12
3,tchow8,,2017-02-24 16:02:00,https://polymathprojects.org/2017/02/23/rotas-...,115058,"Yes, this is correct.",,P,484,P12-Pr484-115058,P12-P484-115050,https://polymathprojects.org/2017/02/23/rotas-...,P12
4,rebeccastones82,,2017-02-24 12:54:00,https://polymathprojects.org/2017/02/23/rotas-...,115052,Thanks for organizing this. I’m coming at this...,"[115053, 115054, 115103, 115120, 115123, 11512...",P,484,P12-P484-115052,,,P12
...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,tchow8,,2017-10-23 00:21:00,https://polymathprojects.org/2017/05/05/rotas-...,117238,Although Polymath 12 seems to have gone dorman...,,P,951,P12-P951-117238,,,P12
213,Gil Kalai,http://www.ma.huji.ac.il/~kalai/,2018-10-19 08:49:00,https://polymathprojects.org/2017/05/05/rotas-...,125369,Very nice progress in the paper Halfway to Rot...,,P,951,P12-P951-125369,,,P12
214,Anonymous,,2024-02-04 18:20:00,https://polymathprojects.org/2017/05/05/rotas-...,196442,Is there exist a matroid such that all bases h...,,P,951,P12-P951-196442,,,P12
215,Anonymous,,2024-05-19 11:46:00,https://polymathprojects.org/2017/05/05/rotas-...,196524,,,P,951,P12-P951-196524,,,P12


In [None]:
# Check: Polymath blog LaTeX parsing
df_poly12[df_poly12['comment-id'] == '115050']['content'].iloc[0]

'This looks interesting! In the spirit of polymath projects where even tiny comments are allowed, and just to check my understanding, would you agree that the problem can be reformulated as : Show that for any $n\\in \\mathbb{N}*$ the following game has a winning strategy. a) Pick any $n$ bases $B_1:=( b_{1,1},\\cdots ,b_{1,n}),\\cdots ,$ $B_n:=( b_{n,1},\\cdots ,b_{n,n})$ b) Place them along the diagonal of an $n\\times n$ grid : $\\begin{matrix} B_1 & & & \\\\ & B_2 & &\\\\ & & \\ddots & \\\\ & & & B_n\\end{matrix}$ c) move sideways the elements of each $B_i$ to other cells so that only one element remains in each cell d) now for each column of the grid gather all its $n$ vectors : $\\begin{matrix} b_{1,i_1} & \\cdots & \\cdots & b_{1,i_n}\\\\ b_{2,i_1} & \\cdots & \\cdots & b_{2,i_n}\\\\ \\hdots & \\hdots & \\hdots & \\hdots \\\\ b_{n,i_1} & \\cdots & \\cdots & b_{n,i_n}\\\\ \\hline A_1:=(b_{1,i_1}, \\cdots ,b_{n,i_1}) & \\cdots & \\cdots & A_n:= (b_{1,i_n}, \\cdots ,b_{n,i_n}) \\en

In [None]:
download_df(df_poly12, 'polymath12.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath13

In [None]:
poly13_urls = """
https://gowers.wordpress.com/2017/04/28/a-potential-new-polymath-project-intransitive-dice/
https://gowers.wordpress.com/2017/05/12/intransitive-dice-ii/
https://gowers.wordpress.com/2017/05/19/intransitive-dice-iii/
https://gowers.wordpress.com/2017/05/27/intransitive-dice-iv-first-problem-more-or-less-solved/
https://gowers.wordpress.com/2017/05/30/intransitive-dice-v-we-want-a-local-central-limit-theorem/
https://gowers.wordpress.com/2017/07/25/intransitive-dice-vi-sketch-proof-of-the-main-conjecture-for-the-balanced-sequences-model/
https://gowers.wordpress.com/2017/08/12/intransitive-dice-vii-aiming-for-further-results/
"""
poly13_thread_list = parse_urls(poly13_urls)

df_poly13 = scrape_project(poly13_thread_list)
df_poly13 = configure_columns('P13', df_poly13)
df_poly13

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Brian Conrey,,2017-04-28 19:45:00,https://gowers.wordpress.com/2017/04/28/a-pote...,168834,"Thanks, Tim, for posting this. The only thing ...",,G,6224,P13-G6224-168834,,,P13
1,gowers,https://gowers.wordpress.com,2017-04-28 21:38:00,https://gowers.wordpress.com/2017/04/28/a-pote...,168836,Continuing the analogy with the angles-between...,,G,6224,P13-G6224-168836,,,P13
2,Kevin Costello,,2017-04-28 21:56:00,https://gowers.wordpress.com/2017/04/28/a-pote...,168837,For the question of “How likely is it that two...,"[168847, 168848]",G,6224,P13-G6224-168837,,,P13
3,gowers,https://gowers.wordpress.com,2017-04-29 09:56:00,https://gowers.wordpress.com/2017/04/28/a-pote...,168847,"I like this idea. To make a start on it, one c...",,G,6224,P13-Gr6224-168847,P13-G6224-168837,https://gowers.wordpress.com/2017/04/28/a-pote...,P13
4,gowers,https://gowers.wordpress.com,2017-04-29 10:10:00,https://gowers.wordpress.com/2017/04/28/a-pote...,168848,"Also, it looks to me as though the ties questi...",,G,6224,P13-Gr6224-168848,P13-G6224-168837,https://gowers.wordpress.com/2017/04/28/a-pote...,P13
...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,K,,2018-02-05 10:19:00,https://gowers.wordpress.com/2017/08/12/intran...,254336,I am very curious – is there some progress in ...,"[254361, 255532]",G,6345,P13-G6345-254336,,,P13
302,gowers,https://gowers.wordpress.com,2018-02-05 12:38:00,https://gowers.wordpress.com/2017/08/12/intran...,254361,I’ve been so busy on other projects that I hav...,,G,6345,P13-Gr6345-254361,P13-G6345-254336,https://gowers.wordpress.com/2017/08/12/intran...,P13
303,K,,2018-02-09 23:00:00,https://gowers.wordpress.com/2017/08/12/intran...,255532,"Wonderful, I am very much looking forward.",,G,6345,P13-Gr6345-255532,P13-G6345-254336,https://gowers.wordpress.com/2017/08/12/intran...,P13
304,K,,2018-04-30 20:25:00,https://gowers.wordpress.com/2017/08/12/intran...,291413,"In the last few months, did you consider wheth...",[292636],G,6345,P13-G6345-291413,,,P13


In [None]:
download_df(df_poly13, 'polymath13.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath14

In [None]:
poly14_urls = """
https://terrytao.wordpress.com/2017/12/16/bi-invariant-metrics-of-linear-growth-on-the-free-group/
https://terrytao.wordpress.com/2017/12/19/bi-invariant-metrics-of-linear-growth-on-the-free-group-ii/
https://terrytao.wordpress.com/2017/12/21/metrics-of-linear-growth-the-solution/
"""
poly14_thread_list = parse_urls(poly14_urls)

df_poly14 = scrape_project(poly14_thread_list)
df_poly14 = configure_columns('P14', df_poly14)
df_poly14

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Tobias Fritz,http://personal-homepages.mis.mpg.de/fritz/,2017-12-16 23:42:00,https://terrytao.wordpress.com/2017/12/16/bi-i...,490066,"As a rather trivial observation, it may be wor...","[490070, 490074, 490099, 490076]",T,10356,P14-T10356-490066,,,P14
1,Alexander Shamov,https://www.facebook.com/app_scoped_user_id/10...,2017-12-16 23:52:00,https://terrytao.wordpress.com/2017/12/16/bi-i...,490068,> What is not clear to me is if one can keep a...,"[490070, 490074, 490099, 490076]",T,10356,P14-T10356-490068,,,P14
2,Lior Silberman,http://www.math.ubc.ca/~lior/,2017-12-17 00:34:00,https://terrytao.wordpress.com/2017/12/16/bi-i...,490070,Take the direct sum of the representation and ...,"[490074, 490099]",T,10356,P14-Tr10356-490070,P14-T10356-490068,https://terrytao.wordpress.com/2017/12/16/bi-i...,P14
3,Will Sawin,http://williamsawin.com,2017-12-17 01:43:00,https://terrytao.wordpress.com/2017/12/16/bi-i...,490074,"There is no such norm on the Heisenberg group,...",[490099],T,10356,P14-Tr10356-490074,P14-T10356-490070,https://terrytao.wordpress.com/2017/12/16/bi-i...,P14
4,Lior Silberman,http://www.math.ubc.ca/~lior/,2017-12-17 08:33:00,https://terrytao.wordpress.com/2017/12/16/bi-i...,490099,Great! This shows that every norm on a nilpote...,,T,10356,P14-Tr10356-490099,P14-T10356-490074,https://terrytao.wordpress.com/2017/12/16/bi-i...,P14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,sgadgil,http://math.iisc.ernet.in/~gadgil,2018-01-11 07:08:00,https://terrytao.wordpress.com/2017/12/21/metr...,491302,I would say yes to GT – the tradition is that ...,,T,10369,P14-Tr10369-491302,P14-T10369-491297,https://terrytao.wordpress.com/2017/12/21/metr...,P14
332,Tobias Fritz,http://personal-homepages.mis.mpg.de/fritz/,2018-01-11 09:10:00,https://terrytao.wordpress.com/2017/12/21/metr...,491306,"Sounds great, thank you!",,T,10369,P14-Tr10369-491306,P14-T10369-491297,https://terrytao.wordpress.com/2017/12/21/metr...,P14
333,Apoorva Khare,http://www.math.iisc.ac.in/~khare/,2018-01-11 09:09:00,https://terrytao.wordpress.com/2017/12/21/metr...,491305,My apologies for the multiple comments here: I...,[491320],T,10369,P14-T10369-491305,,,P14
334,Lior Silberman,https://www.math.ubc.ca/~lior/,2018-01-11 11:32:00,https://terrytao.wordpress.com/2017/12/21/metr...,491320,"One more typo: in the proof of Thm 4.3, we pul...",,T,10369,P14-Tr10369-491320,P14-T10369-491305,https://terrytao.wordpress.com/2017/12/21/metr...,P14


In [None]:
download_df(df_poly14, 'polymath14.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath15

In [None]:
poly15_urls = """
https://terrytao.wordpress.com/2018/01/24/polymath-proposal-upper-bounding-the-de-bruijn-newman-constant/
https://terrytao.wordpress.com/2018/01/27/polymath15-first-thread-computing-h_t-asymptotics-and-dynamics-of-zeroes/
https://terrytao.wordpress.com/2018/02/02/polymath15-second-thread-generalising-the-riemann-siegel-approximate-functional-equation/
https://terrytao.wordpress.com/2018/02/12/polymath15-third-thread-computing-and-approximating-h_t/
https://terrytao.wordpress.com/2018/02/24/polymath15-fourth-thread-closing-in-on-the-test-problem/
https://terrytao.wordpress.com/2018/03/02/polymath15-fifth-thread-finishing-off-the-test-problem/
https://terrytao.wordpress.com/2018/03/18/polymath15-sixth-thread-the-test-problem-and-beyond/
https://terrytao.wordpress.com/2018/03/28/polymath15-seventh-thread-going-below-0-48/
https://terrytao.wordpress.com/2018/04/17/polymath15-eighth-thread-going-below-0-28/
https://terrytao.wordpress.com/2018/05/04/polymath15-ninth-thread-going-below-0-22/
https://terrytao.wordpress.com/2018/09/06/polymath15-tenth-thread-numerics-update/
https://terrytao.wordpress.com/2018/12/28/polymath-15-eleventh-thread-writing-up-the-results-and-exploring-negative-t/
https://terrytao.wordpress.com/2019/04/30/11075/
"""
poly15_thread_list = parse_urls(poly15_urls)

df_poly15 = scrape_project(poly15_thread_list)
df_poly15 = configure_columns('P15', df_poly15)
df_poly15

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Jair,,2018-01-24 17:05:00,https://terrytao.wordpress.com/2018/01/24/poly...,491743,There seems to be a missing factor in the give...,[491746],T,10419,P15-T10419-491743,,,P15
1,Anonymous,,2018-01-24 17:44:00,https://terrytao.wordpress.com/2018/01/24/poly...,491746,"Yes, t is missing",,T,10419,P15-Tr10419-491746,P15-T10419-491743,https://terrytao.wordpress.com/2018/01/24/poly...,P15
2,Anonymous,,2018-01-24 20:41:00,https://terrytao.wordpress.com/2018/01/24/poly...,491750,the differential inequality (1) can be written...,"[491759, 491788, 491794, 491797, 491834, 49179...",T,10419,P15-T10419-491750,,,P15
3,Anonymous,,2018-01-25 02:27:00,https://terrytao.wordpress.com/2018/01/24/poly...,491753,Well from the perspective of the numerical tea...,"[491759, 491788, 491794, 491797, 491834, 49179...",T,10419,P15-T10419-491753,,,P15
4,Anonymous,,2018-01-25 05:46:00,https://terrytao.wordpress.com/2018/01/24/poly...,491759,Although a precise horizontal(!) localization ...,"[491788, 491794, 491797, 491834, 491795, 49179...",T,10419,P15-Tr10419-491759,P15-T10419-491753,https://terrytao.wordpress.com/2018/01/24/poly...,P15
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1198,Anonymous,,2020-04-22 22:26:00,https://terrytao.wordpress.com/2019/04/30/1107...,554539,“Just a small comment” :),[554639],T,11075,P15-Tr11075-554539,P15-T11075-554457,https://terrytao.wordpress.com/2019/04/30/1107...,P15
1199,goingtoinfinity,http://mariokrenn.wordpress.com,2020-04-22 23:40:00,https://terrytao.wordpress.com/2019/04/30/1107...,554556,Are there any ways to confirm the verification...,[554639],T,11075,P15-Tr11075-554556,P15-T11075-554457,https://terrytao.wordpress.com/2019/04/30/1107...,P15
1200,Terence Tao,http://www.math.ucla.edu/~tao,2020-04-23 07:34:00,https://terrytao.wordpress.com/2019/04/30/1107...,554639,This is a good question. I think with our curr...,,T,11075,P15-Tr11075-554639,P15-T11075-554556,https://terrytao.wordpress.com/2019/04/30/1107...,P15
1201,Rudolph,,2020-05-01 10:39:00,https://terrytao.wordpress.com/2019/04/30/1107...,556524,From the paper: “(…)The next entry in Table 1 ...,,T,11075,P15-Tr11075-556524,P15-T11075-554457,https://terrytao.wordpress.com/2019/04/30/1107...,P15


In [None]:
download_df(df_poly15, 'polymath15.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Polymath16

In [None]:
# Check: Dustin Mixon single thread
# df_dustin = scrape_thread('D', 'https://dustingmixon.wordpress.com/2018/04/14/polymath16-first-thread-simplifying-de-greys-graph/')
# df_dustin

In [None]:
poly16_urls = """
https://dustingmixon.wordpress.com/2018/04/14/polymath16-first-thread-simplifying-de-greys-graph/
https://dustingmixon.wordpress.com/2018/04/22/polymath16-second-thread-what-does-it-take-to-be-5-chromatic/
https://dustingmixon.wordpress.com/2018/05/01/polymath16-third-thread-is-6-chromatic-within-reach/
https://dustingmixon.wordpress.com/2018/05/05/polymath16-fourth-thread-applying-the-probabilistic-method/
https://dustingmixon.wordpress.com/2018/05/10/polymath16-fifth-thread-human-verifiable-proofs/
https://dustingmixon.wordpress.com/2018/05/29/polymath16-sixth-thread-wrestling-with-infinite-graphs/
https://dustingmixon.wordpress.com/2018/06/16/polymath16-seventh-thread-upper-bounds/
https://dustingmixon.wordpress.com/2018/06/24/polymath16-eighth-thread-more-upper-bounds/
https://dustingmixon.wordpress.com/2018/07/02/polymath16-ninth-thread-searching-for-a-6-coloring/
https://dustingmixon.wordpress.com/2018/08/28/polymath16-tenth-thread-open-sat-instances/
https://dustingmixon.wordpress.com/2018/09/14/polymath16-eleventh-thread-chromatic-numbers-of-planar-sets/
https://dustingmixon.wordpress.com/2019/03/23/polymath16-twelfth-thread-year-in-review-and-future-plans/
https://dustingmixon.wordpress.com/2019/07/08/polymath16-thirteenth-thread-bumping-the-deadline/
https://dustingmixon.wordpress.com/2019/08/05/polymath16-fourteenth-thread-automated-graph-minimization/
https://dustingmixon.wordpress.com/2019/12/12/polymath16-fifteenth-thread-writing-the-paper-and-chasing-down-loose-ends/
https://dustingmixon.wordpress.com/2020/05/11/polymath16-sixteenth-thread-writing-the-paper-and-chasing-down-loose-ends-ii/
https://dustingmixon.wordpress.com/2021/02/01/polymath16-seventeenth-thread-declaring-victory/
"""
poly16_thread_list = parse_urls(poly16_urls)

In [None]:
df_poly16 = scrape_project(poly16_thread_list)
df_poly16 = configure_columns('P16', df_poly16)
df_poly16

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,ag24ag24,,2018-04-14 14:11:00,https://dustingmixon.wordpress.com/2018/04/14/...,3812,Many thanks Dustin! I am delighted to say that...,"[3917, 3918, 3919, 3922, 3923, 3924, 4269, 392...",D,4891,P16-D4891-3812,,,P16
1,Warren D Smith,http://rangevoting.org,2018-04-19 18:56:00,https://dustingmixon.wordpress.com/2018/04/14/...,3917,Some remarks about the Hadwiger-Nelson problem...,"[3918, 3919, 3922, 3923, 3924, 4269]",D,4891,P16-Dr4891-3917,P16-D4891-3812,https://dustingmixon.wordpress.com/2018/04/14/...,P16
2,Warren D Smith,http://rangevoting.org,2018-04-19 19:02:00,https://dustingmixon.wordpress.com/2018/04/14/...,3918,"Hey, what the hell. Your worthless blog softwa...",,D,4891,P16-Dr4891-3918,P16-D4891-3917,https://dustingmixon.wordpress.com/2018/04/14/...,P16
3,Warren D Smith,http://rangevoting.org,2018-04-19 19:04:00,https://dustingmixon.wordpress.com/2018/04/14/...,3919,Let me see if it actually says it now… –it did...,,D,4891,P16-Dr4891-3919,P16-D4891-3917,https://dustingmixon.wordpress.com/2018/04/14/...,P16
4,Lior Silberman,http://www.math.ubc.ca/~lior/,2018-04-19 22:13:00,https://dustingmixon.wordpress.com/2018/04/14/...,3922,The problem is probably with angle brackets: H...,,D,4891,P16-Dr4891-3922,P16-D4891-3917,https://dustingmixon.wordpress.com/2018/04/14/...,P16
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2561,Jaan Parts,,2023-02-01 09:20:00,https://dustingmixon.wordpress.com/2021/02/01/...,48652,The last sequence is described by the formula ...,,D,5088,P16-Dr5088-48652,P16-D5088-48625,https://dustingmixon.wordpress.com/2021/02/01/...,P16
2562,Jaan Parts,,2023-02-01 14:41:00,https://dustingmixon.wordpress.com/2021/02/01/...,48678,Tom’s formula for a 19-vertex generating graph...,,D,5088,P16-Dr5088-48678,P16-D5088-48625,https://dustingmixon.wordpress.com/2021/02/01/...,P16
2563,Jaan Parts,,2023-02-01 14:56:00,https://dustingmixon.wordpress.com/2021/02/01/...,48679,For a 31-vertex generating graph on five wheel...,,D,5088,P16-Dr5088-48679,P16-D5088-48625,https://dustingmixon.wordpress.com/2021/02/01/...,P16
2564,Jaan Parts,,2023-02-04 06:38:00,https://dustingmixon.wordpress.com/2021/02/01/...,48987,"So, consider families of graphs of the form $G...",[48993],D,5088,P16-Dr5088-48987,P16-D5088-48587,https://dustingmixon.wordpress.com/2021/02/01/...,P16


In [None]:
download_df(df_poly16, 'polymath16.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Mini1

In [None]:
mini1_urls = """
https://terrytao.wordpress.com/2009/07/20/imo-2009-q6-as-a-mini-polymath-project/
https://terrytao.wordpress.com/2009/07/21/imo-2009-q6-mini-polymath-project-cont/
"""
mini1_thread_list = parse_urls(mini1_urls)

df_mini1 = scrape_project(mini1_thread_list)
df_mini1 = configure_columns('M1', df_mini1)
df_mini1

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,jens,,2009-07-21 05:35:00,https://terrytao.wordpress.com/2009/07/20/imo-...,40377,"140. to 139 oh yes, I see the problem, thanks",,T,2495,M1-T2495-40377,,,M1
1,David Escott,,2009-07-21 07:14:00,https://terrytao.wordpress.com/2009/07/20/imo-...,40384,141. I’m suspicious that Terry is removing pos...,,T,2495,M1-T2495-40384,,,M1
2,David Escott,,2009-07-21 07:15:00,https://terrytao.wordpress.com/2009/07/20/imo-...,40385,141a. Given a set where the Grasshopper can re...,,T,2495,M1-T2495-40385,,,M1
3,David Escott,,2009-07-21 07:15:00,https://terrytao.wordpress.com/2009/07/20/imo-...,40386,141b. Speyer (2) mentioned a minimal size need...,,T,2495,M1-T2495-40386,,,M1
4,David Escott,,2009-07-21 07:16:00,https://terrytao.wordpress.com/2009/07/20/imo-...,40388,141c. Final observation. The beginning an endi...,,T,2495,M1-T2495-40388,,,M1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,ar,,2009-07-21 13:16:00,https://terrytao.wordpress.com/2009/07/21/imo-...,40481,"230. @sw (201). Regarding the graph approach, ...",,T,2520,M1-T2520-40481,,,M1
332,Mark Bennet,,2009-07-21 13:17:00,https://terrytao.wordpress.com/2009/07/21/imo-...,40482,223 David – my equivalence relation can reduce...,,T,2520,M1-T2520-40482,,,M1
333,ar,,2009-07-21 13:22:00,https://terrytao.wordpress.com/2009/07/21/imo-...,40483,232. Let me write the graph restatment again. ...,,T,2520,M1-T2520-40483,,,M1
334,David Speyer,,2009-07-21 14:08:00,https://terrytao.wordpress.com/2009/07/21/imo-...,40484,"233 ar: If I understand you correctly, what yo...",,T,2520,M1-T2520-40484,,,M1


In [None]:
download_df(df_mini1, 'mini1.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Mini2

In [None]:
df_mini2 = scrape_thread('P', 'http://polymathprojects.org/2010/07/08/minipolymath2-project-imo-2010-q5/')
df_mini2 = configure_columns('M2', df_mini2)
df_mini2

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Aaron Hill,,2010-07-08 16:13:00,http://polymathprojects.org/2010/07/08/minipol...,2110,By the first rule we can say that a coin in B_...,[2113],P,175,M2-P175-2110,,,M2
1,Greg,,2010-07-08 16:19:00,http://polymathprojects.org/2010/07/08/minipol...,2113,"– more precisely, the total worth goes up when...",,P,175,M2-Pr175-2113,M2-P175-2110,http://polymathprojects.org/2010/07/08/minipol...,M2
2,oz,,2010-07-08 16:14:00,http://polymathprojects.org/2010/07/08/minipol...,2111,Trivial observation: whenever the left-most bo...,"[2116, 2121, 2117, 2119, 2127, 2144]",P,175,M2-P175-2111,,,M2
3,Aaron Hill,,2010-07-08 16:35:00,http://polymathprojects.org/2010/07/08/minipol...,2116,This seems to imply that we cannot get arbitra...,[2121],P,175,M2-Pr175-2116,M2-P175-2111,http://polymathprojects.org/2010/07/08/minipol...,M2
4,Alexandr Kazda,,2010-07-08 16:45:00,http://polymathprojects.org/2010/07/08/minipol...,2121,We can probably construct some sort of recursi...,,P,175,M2-Pr175-2121,M2-P175-2116,http://polymathprojects.org/2010/07/08/minipol...,M2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,Mark Bennet,,2010-07-12 16:42:00,http://polymathprojects.org/2010/07/08/minipol...,2255,"I think I can see the pattern here. [1,1,1,1,1...",,P,175,M2-Pr175-2255,M2-P175-2253,http://polymathprojects.org/2010/07/08/minipol...,M2
116,John,,2010-07-11 02:01:00,http://polymathprojects.org/2010/07/08/minipol...,2247,Maybe there’s a a way to solve using conserved...,[2257],P,175,M2-P175-2247,,,M2
117,Mark Bennet,,2010-07-12 22:02:00,http://polymathprojects.org/2010/07/08/minipol...,2257,Every extra place to the left adds an extra le...,,P,175,M2-Pr175-2257,M2-P175-2247,http://polymathprojects.org/2010/07/08/minipol...,M2
118,Mark Bennet,,2010-07-16 17:16:00,http://polymathprojects.org/2010/07/08/minipol...,2263,There are various ways of getting a recursion ...,,P,175,M2-P175-2263,,,M2


In [None]:
download_df(df_mini2, 'mini2.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Mini3

In [None]:
df_mini3 = scrape_thread('P', 'https://polymathprojects.org/2011/07/19/minipolymath3-project-2011-imo/')
df_mini3 = configure_columns('M3', df_mini3)
df_mini3

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Richard McCart,,2011-08-22 03:52:00,https://polymathprojects.org/2011/07/19/minipo...,3501,Could you start off with a random point in the...,,P,249,M3-P249-3501,,,M3
1,Gal,,2011-07-19 20:07:00,https://polymathprojects.org/2011/07/19/minipo...,3305,Connecting the dots: At the point where the pi...,"[3323, 3341, 3368, 3399, 3332, 3342, 3348]",P,249,M3-P249-3305,,,M3
2,Garf,,2011-07-19 20:23:00,https://polymathprojects.org/2011/07/19/minipo...,3323,Nice. We need only to consider the times when ...,"[3341, 3368, 3399]",P,249,M3-Pr249-3323,M3-P249-3305,https://polymathprojects.org/2011/07/19/minipo...,M3
3,Gal,,2011-07-19 20:37:00,https://polymathprojects.org/2011/07/19/minipo...,3341,Isn’t there always a cycle that spans all the ...,"[3368, 3399]",P,249,M3-Pr249-3341,M3-P249-3323,https://polymathprojects.org/2011/07/19/minipo...,M3
4,Gal,,2011-07-19 20:56:00,https://polymathprojects.org/2011/07/19/minipo...,3368,"For example, the restriction on how the next p...",[3399],P,249,M3-Pr249-3368,M3-P249-3341,https://polymathprojects.org/2011/07/19/minipo...,M3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,twio,http://twio.wordpress.com,2012-02-14 13:24:00,https://polymathprojects.org/2011/07/19/minipo...,5095,"Just encountered this site, and this problem, ...",,P,249,M3-P249-5095,,,M3
142,mkhida,http://mkhida.wordpress.com,2012-03-28 14:09:00,https://polymathprojects.org/2011/07/19/minipo...,5337,"sorry for my English,I have just a remark on t...",,P,249,M3-P249-5337,,,M3
143,brandon,,2012-04-02 05:21:00,https://polymathprojects.org/2011/07/19/minipo...,5356,Lemma 1: Let set A have this property Let T be...,[5359],P,249,M3-P249-5356,,,M3
144,twio,http://twio.wordpress.com,2012-04-02 13:20:00,https://polymathprojects.org/2011/07/19/minipo...,5359,brandon wrote: Let set A have this property Le...,,P,249,M3-Pr249-5359,M3-P249-5356,https://polymathprojects.org/2011/07/19/minipo...,M3


In [None]:
download_df(df_mini3, 'mini3.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Mini4

In [None]:
df_mini4 = scrape_thread('P', 'https://polymathprojects.org/2012/07/12/minipolymath4-project-imo-2012-q3/')
df_mini4 = configure_columns('M4', df_mini4)
df_mini4

Unnamed: 0,author,author-href,time,comment-href,comment-id,content,child-ids,blog,post-id,id,in-reply-to,in-reply-to-href,project-id
0,Bob,,2012-07-12 22:11:00,https://polymathprojects.org/2012/07/12/minipo...,7557,Obvious observations: It seems for part 1 we h...,,P,304,M4-P304-7557,,,M4
1,Mihai Nica,http://probabilitynica.wordpress.com,2012-07-12 22:17:00,https://polymathprojects.org/2012/07/12/minipo...,7558,The fact that player A has to choose the numbe...,,P,304,M4-P304-7558,,,M4
2,Jaakko,,2012-07-12 22:19:00,https://polymathprojects.org/2012/07/12/minipo...,7559,Are there any results from Ramsey theory or re...,,P,304,M4-P304-7559,,,M4
3,Jon,,2012-07-12 22:22:00,https://polymathprojects.org/2012/07/12/minipo...,7560,Obvious: If we choose sets S_p to be of the fo...,"[7570, 7579]",P,304,M4-P304-7560,,,M4
4,Vladimir Nesov,,2012-07-12 22:34:00,https://polymathprojects.org/2012/07/12/minipo...,7570,"More generally, for any partition of some set ...",[7579],P,304,M4-Pr304-7570,M4-P304-7560,https://polymathprojects.org/2012/07/12/minipo...,M4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,akash chayan,,2012-07-13 19:53:00,https://polymathprojects.org/2012/07/12/minipo...,7690,The game can be re-formulated in an equivalent...,[7696],P,304,M4-P304-7690,,,M4
77,Gagik Amirkhanyan,http://gagika.wordpress.com,2012-07-13 20:14:00,https://polymathprojects.org/2012/07/12/minipo...,7696,"I think it’s correct solution, just in the def...",,P,304,M4-Pr304-7696,M4-P304-7690,https://polymathprojects.org/2012/07/12/minipo...,M4
78,Terence Tao,http://www.math.ucla.edu/~tao,2012-07-13 19:56:00,https://polymathprojects.org/2012/07/12/minipo...,7691,"Dear all, As this thread is becoming quite ful...",,P,304,M4-P304-7691,,,M4
79,prateekchandrajha,http://prateekchandrajha.wordpress.com,2012-07-30 22:58:00,https://polymathprojects.org/2012/07/12/minipo...,8392,Reblogged this on Wikipedia Afficianado and co...,,P,304,M4-P304-8392,,,M4


In [None]:
download_df(df_mini4, 'mini4.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>