### Imports

In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
from tqdm import tqdm

In [205]:
tqdm.pandas()

### Parse the Posts.xml. 

Note: This step may take around 20-30 mins

In [2]:
tree = ET.parse('Posts.xml')

### Extract necessary fields from xml

In [3]:
root = tree.getroot()

In [4]:
pt_title = []
pt_post_id = []
pt_post_type_id = []
pt_accepted_answer_id = []
pt_creation_date = []
pt_score = []
pt_body = []
pt_tags = []
pt_parent_id = []

# Iterate through the elements in the XML file
for post in tqdm(root):
    # Extract the necessary information
    post_id = post.get('Id')
    post_type_id = post.get('PostTypeId')
    accepted_answer_id = post.get('AcceptedAnswerId')
    creation_date = post.get('CreationDate')
    score = post.get('Score')
    body = post.get('Body')
    tag = post.get("Tags")
    title = post.get("Title")
    parent_id = post.get("ParentId")
    
    # Print the extracted information
#     if tag and "pytorch" in tag:
#         print('Post ID:', post_id)
#         print('Post Type ID:', post_type_id)
#         print('Accepted Answer ID:', accepted_answer_id)
#         print('Creation Date:', creation_date)
#         print('Score:', score)
#         print('Body:', body)
#         print("Tags: ", tag)
    pt_post_id.append(post_id)
    pt_post_type_id.append(post_type_id)
    pt_accepted_answer_id.append(accepted_answer_id)
    pt_creation_date.append(creation_date)
    pt_score.append(score)
    pt_body.append(body)
    pt_tags.append(tag)
    pt_title.append(title)
    pt_parent_id.append(parent_id)
    
    

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57721548/57721548 [02:40<00:00, 359730.54it/s]


In [5]:
len(pt_post_id)

57721548

In [6]:
questions_df = pd.DataFrame({
    "pt_post_id" : pt_post_id,
    "pt_post_type_id": pt_post_type_id,
    "pt_accepted_answer_id": pt_accepted_answer_id,
    "pt_creation_date": pt_creation_date,
    "pt_score": pt_score,
    "pt_title": pt_title,
    "pt_body": pt_body,
    "pt_tags": pt_tags,
    "pt_parent_id": pt_parent_id
})

In [7]:
questions_df.shape

(57721548, 9)

In [8]:
questions_df[questions_df.pt_post_type_id == "1"].shape

(23273009, 9)

In [None]:
questions_df.to_csv("stack_overflow_dataset_dump.csv", index=False)

### Query and extract pytorch posts

In [9]:
questions_df["pt_tags"] = questions_df["pt_tags"].str.lower()

In [10]:
pt_questions_df = questions_df[(questions_df["pt_tags"].notna()) & (questions_df["pt_tags"].str.contains("pytorch"))]

In [11]:
pt_questions_df.shape

(19837, 9)

In [12]:
pt_questions_df.to_csv("pytorch_questions_dump.csv", index=False)

In [13]:
pt_questions_df

Unnamed: 0,pt_post_id,pt_post_type_id,pt_accepted_answer_id,pt_creation_date,pt_score,pt_title,pt_body,pt_tags,pt_parent_id
27837861,34750268,1,34762233,2016-01-12T17:36:25.473,9,Extracting the top-k value-indices from a 1-D ...,<p>Given a 1-D tensor in Torch (<code>torch.Te...,<python><lua><pytorch><torch>,
30769673,38543850,1,38676842,2016-07-23T16:15:43.967,40,How to Display Custom Images in Tensorboard (e...,"<p>The <a href=""https://github.com/tensorflow/...",<python><tensorflow><matplotlib><pytorch><tens...,
33236300,41767005,1,43824857,2017-01-20T15:22:08.063,11,Python wheels: cp27mu not supported,"<p>I'm trying to install pytorch (<a href=""htt...",<python><linux><unicode><pytorch>,
33275551,41818618,1,,2017-01-24T01:34:19.060,0,PyTorch doesn't import after installing Anaconda,<p>I just installed PyTorch after installing A...,<macos><python-3.x><ipython><anaconda><pytorch>,
33307877,41861354,1,54261158,2017-01-25T20:45:35.297,8,Loading Torch7 trained models (.t7) in PyTorch,<p>I am using Torch7 library for implementing ...,<python><lua><pytorch><torch><pre-trained-model>,
...,...,...,...,...,...,...,...,...,...
57720026,74679922,1,,2022-12-04T18:58:11.630,0,BGR to RGB for CUB_200 images by Image.split(),<p>I am creating a PyTorch dataset and dataloa...,<python><image><pytorch><pytorch-dataloader>,
57720033,74679929,1,,2022-12-04T18:58:43.193,0,Neural Networks Extending Learning Domain,<p>I have a simple function <strong>f</strong>...,<python><pytorch>,
57720116,74680028,1,,2022-12-04T19:10:58.143,-3,Is RTX 3060 compatible with PyTorch,"<p><a href=""https://i.stack.imgur.com/Kutov.jp...",<pytorch><gpu>,
57720451,74680391,1,,2022-12-04T19:55:45.433,0,Pytorch: how is FSDP optimizer state sharding ...,<p>I've been to trying understand how pytorch ...,<python><pytorch>,


### Lets split the dataframe into two parts

1. Questions which has accepted answer
2. Questions which doesn't have accepted answers

In [81]:
questions_with_accepted_df = pt_questions_df[pt_questions_df.pt_accepted_answer_id.notna()]
questions_without_accepted_df = pt_questions_df[pt_questions_df.pt_accepted_answer_id.isnull()]

In [82]:
assert pt_questions_df.shape[0] == (questions_with_accepted_df.shape[0] + questions_without_accepted_df.shape[0])

### For Each question in questions_with_accepted_df, lets query the database and find out the accepted answer post. 

To find the accepted answer from db, we need to query `AcceptedAnswerId` from question and search for `Id` as `AcceptedAnswerId` and `PostTypeId` == "2"

In [83]:
questions_with_accepted_df.shape

(7558, 9)

In [84]:
# questions_with_accepted_df.pt_post_id


pt_answer_df = questions_df[questions_df.pt_parent_id.isin(questions_with_accepted_df.pt_post_id)]

In [85]:
def concat_answers(row):
    post_id = row["pt_post_id"]
    
    answers = pt_answer_df[pt_answer_df["pt_parent_id"] == post_id]
    
    answers_body = answers["pt_body"]
    
    concat_answer = ""
    for text in answers_body:
        concat_answer += text
        
    return concat_answer
        

In [86]:
questions_with_accepted_df["context"] = questions_with_accepted_df.apply(lambda x: concat_answers(x), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  questions_with_accepted_df["context"] = questions_with_accepted_df.apply(lambda x: concat_answers(x), axis=1)


In [87]:
questions_with_accepted_df.shape

(7558, 10)

In [88]:
accepted_answer_df = questions_df[questions_df.pt_post_id.isin(questions_with_accepted_df.pt_accepted_answer_id)]

In [89]:
accepted_answer_df.shape

(7558, 9)

In [90]:
questions_with_accepted_df.shape

(7558, 10)

In [91]:
accepted_answer_df["pt_accepted_answer_id"] = accepted_answer_df["pt_post_id"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  accepted_answer_df["pt_accepted_answer_id"] = accepted_answer_df["pt_post_id"]


In [92]:
accepted_answer_df.rename({"pt_body": "pt_answer"}, inplace=True, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  accepted_answer_df.rename({"pt_body": "pt_answer"}, inplace=True, axis=1)


In [93]:
accepted_answer_df.columns

Index(['pt_post_id', 'pt_post_type_id', 'pt_accepted_answer_id',
       'pt_creation_date', 'pt_score', 'pt_title', 'pt_answer', 'pt_tags',
       'pt_parent_id'],
      dtype='object')

In [94]:
questions_with_accepted_df = questions_with_accepted_df.merge(accepted_answer_df[["pt_accepted_answer_id", "pt_answer"]], how="inner", on=["pt_accepted_answer_id"])

In [95]:
questions_with_accepted_df.shape

(7558, 11)

In [96]:
# assert questions_with_accepted_df.shape[0] == accepted_answer_df.shape[0]

In [97]:
# accepted_answer_body_list = accepted_answer_df.pt_body.tolist()

In [98]:
# questions_with_accepted_df["pt_answer"] = accepted_answer_body_list

In [99]:
# questions_with_accepted_df.shape

In [100]:
questions_with_accepted_df["question"] = questions_with_accepted_df["pt_title"] + " " + questions_with_accepted_df["pt_body"]

In [101]:
questions_with_accepted_df[["pt_title", "context", "pt_answer"]][:5]

Unnamed: 0,pt_title,context,pt_answer
0,Extracting the top-k value-indices from a 1-D ...,<p>Just loop through the tensor and run your c...,"<p>As of pull request <a href=""https://github...."
1,How to Display Custom Images in Tensorboard (e...,<p>It is quite easy to do if you have the imag...,<p>It is quite easy to do if you have the imag...
2,Python wheels: cp27mu not supported,<p>This is exactly that. \nRecompile python un...,<p>This is exactly that. \nRecompile python un...
3,Loading Torch7 trained models (.t7) in PyTorch,<p>The correct function is <code>load_lua</cod...,<p>As of PyTorch 1.0 <code>torch.utils.seriali...
4,PyTorch: How to use DataLoaders for custom Dat...,"<p>Yes, that is possible. Just create the obje...","<p>Yes, that is possible. Just create the obje..."


In [103]:
questions_with_accepted_df

Unnamed: 0,pt_post_id,pt_post_type_id,pt_accepted_answer_id,pt_creation_date,pt_score,pt_title,pt_body,pt_tags,pt_parent_id,context,pt_answer,question
0,34750268,1,34762233,2016-01-12T17:36:25.473,9,Extracting the top-k value-indices from a 1-D ...,<p>Given a 1-D tensor in Torch (<code>torch.Te...,<python><lua><pytorch><torch>,,<p>Just loop through the tensor and run your c...,"<p>As of pull request <a href=""https://github....",Extracting the top-k value-indices from a 1-D ...
1,38543850,1,38676842,2016-07-23T16:15:43.967,40,How to Display Custom Images in Tensorboard (e...,"<p>The <a href=""https://github.com/tensorflow/...",<python><tensorflow><matplotlib><pytorch><tens...,,<p>It is quite easy to do if you have the imag...,<p>It is quite easy to do if you have the imag...,How to Display Custom Images in Tensorboard (e...
2,41767005,1,43824857,2017-01-20T15:22:08.063,11,Python wheels: cp27mu not supported,"<p>I'm trying to install pytorch (<a href=""htt...",<python><linux><unicode><pytorch>,,<p>This is exactly that. \nRecompile python un...,<p>This is exactly that. \nRecompile python un...,Python wheels: cp27mu not supported <p>I'm try...
3,41861354,1,54261158,2017-01-25T20:45:35.297,8,Loading Torch7 trained models (.t7) in PyTorch,<p>I am using Torch7 library for implementing ...,<python><lua><pytorch><torch><pre-trained-model>,,<p>The correct function is <code>load_lua</cod...,<p>As of PyTorch 1.0 <code>torch.utils.seriali...,Loading Torch7 trained models (.t7) in PyTorch...
4,41924453,1,42054194,2017-01-29T18:31:24.687,65,PyTorch: How to use DataLoaders for custom Dat...,<p>How to make use of the <code>torch.utils.da...,<python><torch><pytorch>,,"<p>Yes, that is possible. Just create the obje...","<p>Yes, that is possible. Just create the obje...",PyTorch: How to use DataLoaders for custom Dat...
...,...,...,...,...,...,...,...,...,...,...,...,...
7553,74644993,1,74645308,2022-12-01T16:18:26.423,0,FashionMNIST Dataset not transforming to Tensor,<p>Trying to calculate the mean and standard d...,<python><pytorch><tensor><mnist><torchvision>,,<p><code>datasets.FashionMNIST</code> returns ...,<p><code>datasets.FashionMNIST</code> returns ...,FashionMNIST Dataset not transforming to Tenso...
7554,74650187,1,74651024,2022-12-02T02:15:48.957,0,conv2d() received an invalid combination of ar...,"<p>After resnet convolution, I want to further...",<pytorch>,,<p>The error is likely to be caused by the fol...,<p>The error is likely to be caused by the fol...,conv2d() received an invalid combination of ar...
7555,74654315,1,74654375,2022-12-02T10:32:00.940,2,Load and Retrain PyTorch model…,<p><strong>Hi all!</strong> Can you help me? ...,<python><pytorch><artificial-intelligence>,,<p>To retrain a PyTorch model with just a .pt ...,<p>To retrain a PyTorch model with just a .pt ...,Load and Retrain PyTorch model… <p><strong>Hi ...
7556,74664418,1,74664458,2022-12-03T06:17:15.340,1,"In pytorch torchscript, how to define mutiple ...",<p>Have a torch model as follow :</p>\n<pre><c...,<pytorch><onnx><onnxruntime><torchscript>,,"<p>Yes, you can define multiple entry points i...","<p>Yes, you can define multiple entry points i...","In pytorch torchscript, how to define mutiple ..."


In [104]:
questions_with_accepted_df.to_csv("questions_with_accepted_df_updated.csv", index=False)

### For questions without accepted answers

#### From the whole dump lets extract the rows which contains only answers

In [105]:
answers_df = questions_df[questions_df.pt_post_type_id == "2"]

In [106]:
answers_df.shape

(34337401, 9)

#### Extract the rows where the parent id matches the post id of not accepted df

In [161]:
questions_without_accepted_df.shape

(12279, 9)

In [162]:
answers_not_accepted_df = answers_df[answers_df.pt_parent_id.isin(questions_without_accepted_df.pt_post_id)]

In [163]:
answers_not_accepted_df.shape

(8596, 9)

In [164]:
answers_not_accepted_df.pt_parent_id.nunique()

6638

In [165]:
answers_not_accepted_df.sort_values(by=["pt_score"], inplace=True, ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  answers_not_accepted_df.sort_values(by=["pt_score"], inplace=True, ascending=False)


In [166]:
answers_not_accepted_df["pt_score"] = answers_not_accepted_df["pt_score"].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  answers_not_accepted_df["pt_score"] = answers_not_accepted_df["pt_score"].astype(int)


In [167]:
parent_id_with_max_score_df = answers_not_accepted_df[answers_not_accepted_df.pt_score>=1].groupby("pt_parent_id", sort=False)["pt_score"].max().reset_index()



In [168]:
parent_id_with_max_score_df.pt_parent_id.nunique()

3205

In [169]:
answers_not_accepted_df = answers_not_accepted_df.merge(parent_id_with_max_score_df, on=['pt_parent_id', 'pt_score'], how="inner")[["pt_parent_id", "pt_body"]]

In [170]:
answers_not_accepted_df.rename(columns={"pt_parent_id": "pt_post_id", "pt_body": "pt_answer"}, inplace=True)

In [171]:
answers_not_accepted_df.shape

(3362, 2)

In [172]:
answers_without_accepted_df = questions_without_accepted_df.merge(answers_not_accepted_df, on="pt_post_id", how="inner")

In [173]:
answers_without_accepted_df.shape

(3362, 10)

In [174]:
questions_with_accepted_df.columns

Index(['pt_post_id', 'pt_post_type_id', 'pt_accepted_answer_id',
       'pt_creation_date', 'pt_score', 'pt_title', 'pt_body', 'pt_tags',
       'pt_parent_id', 'context', 'pt_answer', 'question'],
      dtype='object')

In [175]:
answers_without_accepted_df.columns

Index(['pt_post_id', 'pt_post_type_id', 'pt_accepted_answer_id',
       'pt_creation_date', 'pt_score', 'pt_title', 'pt_body', 'pt_tags',
       'pt_parent_id', 'pt_answer'],
      dtype='object')

In [176]:
answers_without_accepted_df[["pt_post_id", "pt_title", "pt_body", "pt_answer", "pt_score"]]

Unnamed: 0,pt_post_id,pt_title,pt_body,pt_answer,pt_score
0,41818618,PyTorch doesn't import after installing Anaconda,<p>I just installed PyTorch after installing A...,"<p>While the framework name is Pytorch, it mus...",0
1,42636323,Explanation behind actor-critic algorithm in p...,<p>Pytorch provides a good example of using ac...,<ul>\n<li><p>First the rewards a collected for...,0
2,42795226,Pytorch and Polynomial Linear Regression issue,<p>I have modified the code hat I found on the...,<p>There are a couple of things that contribut...,1
3,43104252,undefined symbol: PySlice_AdjustIndices when i...,"<p>I am trying to use PyTorch, and I think the...","<p>I have the same problem, maybe the build is...",5
4,43367075,Pytorch .backward() method without CUDA,<p>I am trying to run the code in the <a href=...,"<p>You should transfer you network, inputs, an...",2
...,...,...,...,...,...
3357,74612146,Is it possible to perform quantization on dens...,<p>I have been trying to performing quantizati...,<p>Here's how to do this on DenseNet169 from t...,0
3358,74637151,"Why when the batch size increased, the epoch t...",<p>Epoch time means the time required to train...,"<p>As you already noticed, there are many fact...",1
3359,74642594,Why does StableDiffusionPipeline return black ...,"<p>I am using the <a href=""https://github.com/...",<p>Apparently it is indeed an Apple Silicon (M...,0
3360,74671399,Locating tags in a string in PHP (with respect...,<p>I want to create a function that labels the...,<p>I think I've got something. How about this:...,1


In [182]:
answers_without_accepted_df.shape

(3362, 10)

In [183]:
answers_without_accepted_df.pt_post_id.nunique()

3205

In [187]:
answers_without_accepted_df = answers_without_accepted_df.drop_duplicates("pt_post_id", keep="first")

In [189]:
answers_without_accepted_df.shape

(3205, 10)

In [196]:
answers_without_accepted_df[["pt_post_id", "pt_title", "pt_body", "pt_answer"]]

Unnamed: 0,pt_post_id,pt_title,pt_body,pt_answer
0,41818618,PyTorch doesn't import after installing Anaconda,<p>I just installed PyTorch after installing A...,"<p>While the framework name is Pytorch, it mus..."
1,42636323,Explanation behind actor-critic algorithm in p...,<p>Pytorch provides a good example of using ac...,<ul>\n<li><p>First the rewards a collected for...
2,42795226,Pytorch and Polynomial Linear Regression issue,<p>I have modified the code hat I found on the...,<p>There are a couple of things that contribut...
3,43104252,undefined symbol: PySlice_AdjustIndices when i...,"<p>I am trying to use PyTorch, and I think the...","<p>I have the same problem, maybe the build is..."
4,43367075,Pytorch .backward() method without CUDA,<p>I am trying to run the code in the <a href=...,"<p>You should transfer you network, inputs, an..."
...,...,...,...,...
3357,74612146,Is it possible to perform quantization on dens...,<p>I have been trying to performing quantizati...,<p>Here's how to do this on DenseNet169 from t...
3358,74637151,"Why when the batch size increased, the epoch t...",<p>Epoch time means the time required to train...,"<p>As you already noticed, there are many fact..."
3359,74642594,Why does StableDiffusionPipeline return black ...,"<p>I am using the <a href=""https://github.com/...",<p>Apparently it is indeed an Apple Silicon (M...
3360,74671399,Locating tags in a string in PHP (with respect...,<p>I want to create a function that labels the...,<p>I think I've got something. How about this:...


In [198]:
answers_without_accepted_df.columns

Index(['pt_post_id', 'pt_post_type_id', 'pt_accepted_answer_id',
       'pt_creation_date', 'pt_score', 'pt_title', 'pt_body', 'pt_tags',
       'pt_parent_id', 'pt_answer'],
      dtype='object')

In [211]:
relevant_answers_df = answers_df[answers_df.pt_parent_id.isin(answers_without_accepted_df.pt_post_id)]

In [212]:
def concat_answers_for_not_accepted(row):
    post_id = row["pt_post_id"]
    
    answers = relevant_answers_df[relevant_answers_df["pt_parent_id"] == post_id]
    
    answers_body = answers["pt_body"]
    
    concat_answer = ""
    for text in answers_body:
        concat_answer += text
        
    return concat_answer
        

In [213]:
answers_without_accepted_df["context"] = answers_without_accepted_df.progress_apply(lambda x: concat_answers_for_not_accepted(x), axis=1)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3205/3205 [00:03<00:00, 1039.79it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  answers_without_accepted_df["context"] = answers_without_accepted_df.progress_apply(lambda x: concat_answers_for_not_accepted(x), axis=1)


In [215]:
answers_without_accepted_df[["pt_post_id", "pt_title", "pt_body", "context", "pt_answer"]]

Unnamed: 0,pt_post_id,pt_title,pt_body,context,pt_answer
0,41818618,PyTorch doesn't import after installing Anaconda,<p>I just installed PyTorch after installing A...,<p>The Py Torch package might have installed t...,"<p>While the framework name is Pytorch, it mus..."
1,42636323,Explanation behind actor-critic algorithm in p...,<p>Pytorch provides a good example of using ac...,<ul>\n<li><p>First the rewards a collected for...,<ul>\n<li><p>First the rewards a collected for...
2,42795226,Pytorch and Polynomial Linear Regression issue,<p>I have modified the code hat I found on the...,<p>There are a couple of things that contribut...,<p>There are a couple of things that contribut...
3,43104252,undefined symbol: PySlice_AdjustIndices when i...,"<p>I am trying to use PyTorch, and I think the...","<p>I have the same problem, maybe the build is...","<p>I have the same problem, maybe the build is..."
4,43367075,Pytorch .backward() method without CUDA,<p>I am trying to run the code in the <a href=...,"<p>You should transfer you network, inputs, an...","<p>You should transfer you network, inputs, an..."
...,...,...,...,...,...
3357,74612146,Is it possible to perform quantization on dens...,<p>I have been trying to performing quantizati...,<p>Here's how to do this on DenseNet169 from t...,<p>Here's how to do this on DenseNet169 from t...
3358,74637151,"Why when the batch size increased, the epoch t...",<p>Epoch time means the time required to train...,"<p>As you already noticed, there are many fact...","<p>As you already noticed, there are many fact..."
3359,74642594,Why does StableDiffusionPipeline return black ...,"<p>I am using the <a href=""https://github.com/...",<p>Apparently it is indeed an Apple Silicon (M...,<p>Apparently it is indeed an Apple Silicon (M...
3360,74671399,Locating tags in a string in PHP (with respect...,<p>I want to create a function that labels the...,<p>I think I've got something. How about this:...,<p>I think I've got something. How about this:...


In [221]:
answers_without_accepted_df.to_csv("questions_without_accepted_df_updated.csv", index=False)

In [222]:
questions_with_accepted_df.columns

Index(['pt_post_id', 'pt_post_type_id', 'pt_accepted_answer_id',
       'pt_creation_date', 'pt_score', 'pt_title', 'pt_body', 'pt_tags',
       'pt_parent_id', 'context', 'pt_answer', 'question'],
      dtype='object')

In [223]:
answers_without_accepted_df.columns

Index(['pt_post_id', 'pt_post_type_id', 'pt_accepted_answer_id',
       'pt_creation_date', 'pt_score', 'pt_title', 'pt_body', 'pt_tags',
       'pt_parent_id', 'pt_answer', 'context'],
      dtype='object')

In [224]:
pt_question_answers_df = pd.concat([questions_with_accepted_df, answers_without_accepted_df], axis=0)

In [225]:
pt_question_answers_df.to_csv("pt_question_answers.csv", index=False)