In [1]:
import pickle

# Load the pickle file
file_path = 'adoption_database.pkl'

# Reading the contents of the file
with open(file_path, 'rb') as file:
    data = pickle.load(file)

# Displaying the type and a preview of the contents
data_type = type(data)
data_preview = repr(data)[:500]  # Show only the first 500 characters to get an idea of the data

# print(data["posts"][:5])

print(f"Number of entries: {len(data)}")

# print("Basic information about the data:")
# print(data.info())

print("\nFirst few rows of the data:")
print(data.head())

# print("\nData types of each column:")
# print(data.dtypes)

# Assuming each post is a list of words
total_words = data['posts'].apply(lambda x: len(x) if isinstance(x, list) else 0).sum()

print(f"Total number of words: {total_words}")

# Counting the number of unique users
num_users = data['user_id'].nunique()

print(f"Number of unique users: {num_users}")


Number of entries: 25945

First few rows of the data:
          user_id      display_name total_posts  \
0        ydkjenn1          ydkjenn1           2   
1  eventuallyfarm  Joanne Alexander           2   
2     vernellinnj       vernellinnj         788   
3   adoptionadmin     adoptionadmin         240   
4      smiles2012        smiles2012         565   

                                               posts  
0  [\nHi. I'm 46 and single. I've adopted two chi...  
1  [\nThey probably just want to make sure you've...  
2  [\nOut of the blue yesterday my baby boy start...  
3  [\nI remember there being a great children's b...  
4  [\nI am in the same situation you are. Mr. Rig...  
Total number of words: 612372
Number of unique users: 25945


In [2]:
data['posts'] = data['posts'].apply(lambda x: ' '.join(x) if isinstance(x, list) else x)

user_posts = data.groupby('user_id')['posts'].agg(' '.join)

total_length = sum([len(text) for text in user_posts])
print("Total length: ", total_length)

average_length = total_length / len(user_posts)

print("Average length: ", average_length)

Total length:  389530016
Average length:  15013.683407207554


In [13]:
import json

file_name = "progress_and_responses_gpt_4_final.json"

def save_progress_and_responses(last_processed_index, responses):
    with open(file_name, 'w') as file:
        data = {
            'last_processed_index': last_processed_index,
            'responses': responses
        }
        json.dump(data, file)


def load_progress_and_responses():
    try:
        with open(file_name, 'r') as file:
            data = json.load(file)
            return data['last_processed_index'], data['responses']
    except FileNotFoundError:
        return -1, []  # No progress file, start from the beginning

    
import openai
  
api_key = ""
client = openai.OpenAI(api_key=api_key)


# Load the last processed index and existing responses
last_processed_index, responses = load_progress_and_responses()

# Loop through each user post starting from the last processed index
for i in range(last_processed_index + 1, len(user_posts)):
    post = user_posts[i]

    print(f"Processing post: {i + 1}/{len(user_posts)}")
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
            {
                "role": "system",
                "content": "You are an Adoption Agent who specializes in reviewing and understanding online posts relating to adoptions. Given the post, only respond yes or no to each of the first three questions:  1) is the author a birth mother, 2) has the birth mother actually given up a child for adoption, 3) did the birth mother give a reason why she chose the adoption family? Finally, if the answer to the first three questions were all yes, then respond concisely to 4) what are the main and specific reasons the adoption family was chosen, if any, choose all that apply: Salary, Age, Occupation, Race, Religion, Order of Presentation, Timing, Siblings, etc, or N/A. Example response: 1) yes 2) yes 3) yes 4) Age: older than self"
            },
            {
                "role": "user",
                "content": post[:50000]
            }
        ]
    )
    response_text = response.choices[0].message.content
    responses.append(response_text)

    # Save progress and responses every 10 posts
    if (i + 1) % 10 == 0:
        save_progress_and_responses(i, responses)


Processing post: 23441/25945
Processing post: 23442/25945
Processing post: 23443/25945
Processing post: 23444/25945
Processing post: 23445/25945
Processing post: 23446/25945
Processing post: 23447/25945
Processing post: 23448/25945
Processing post: 23449/25945
Processing post: 23450/25945
Processing post: 23451/25945
Processing post: 23452/25945
Processing post: 23453/25945
Processing post: 23454/25945
Processing post: 23455/25945
Processing post: 23456/25945
Processing post: 23457/25945
Processing post: 23458/25945
Processing post: 23459/25945
Processing post: 23460/25945
Processing post: 23461/25945
Processing post: 23462/25945
Processing post: 23463/25945
Processing post: 23464/25945
Processing post: 23465/25945
Processing post: 23466/25945
Processing post: 23467/25945
Processing post: 23468/25945
Processing post: 23469/25945
Processing post: 23470/25945
Processing post: 23471/25945
Processing post: 23472/25945
Processing post: 23473/25945
Processing post: 23474/25945
Processing pos

Processing post: 23724/25945
Processing post: 23725/25945
Processing post: 23726/25945
Processing post: 23727/25945
Processing post: 23728/25945
Processing post: 23729/25945
Processing post: 23730/25945
Processing post: 23731/25945
Processing post: 23732/25945
Processing post: 23733/25945
Processing post: 23734/25945
Processing post: 23735/25945
Processing post: 23736/25945
Processing post: 23737/25945
Processing post: 23738/25945
Processing post: 23739/25945
Processing post: 23740/25945
Processing post: 23741/25945
Processing post: 23742/25945
Processing post: 23743/25945
Processing post: 23744/25945
Processing post: 23745/25945
Processing post: 23746/25945
Processing post: 23747/25945
Processing post: 23748/25945
Processing post: 23749/25945
Processing post: 23750/25945
Processing post: 23751/25945
Processing post: 23752/25945
Processing post: 23753/25945
Processing post: 23754/25945
Processing post: 23755/25945
Processing post: 23756/25945
Processing post: 23757/25945
Processing pos

Processing post: 24007/25945
Processing post: 24008/25945
Processing post: 24009/25945
Processing post: 24010/25945
Processing post: 24011/25945
Processing post: 24012/25945
Processing post: 24013/25945
Processing post: 24014/25945
Processing post: 24015/25945
Processing post: 24016/25945
Processing post: 24017/25945
Processing post: 24018/25945
Processing post: 24019/25945
Processing post: 24020/25945
Processing post: 24021/25945
Processing post: 24022/25945
Processing post: 24023/25945
Processing post: 24024/25945
Processing post: 24025/25945
Processing post: 24026/25945
Processing post: 24027/25945
Processing post: 24028/25945
Processing post: 24029/25945
Processing post: 24030/25945
Processing post: 24031/25945
Processing post: 24032/25945
Processing post: 24033/25945
Processing post: 24034/25945
Processing post: 24035/25945
Processing post: 24036/25945
Processing post: 24037/25945
Processing post: 24038/25945
Processing post: 24039/25945
Processing post: 24040/25945
Processing pos

Processing post: 24290/25945
Processing post: 24291/25945
Processing post: 24292/25945
Processing post: 24293/25945
Processing post: 24294/25945
Processing post: 24295/25945
Processing post: 24296/25945
Processing post: 24297/25945
Processing post: 24298/25945
Processing post: 24299/25945
Processing post: 24300/25945
Processing post: 24301/25945
Processing post: 24302/25945
Processing post: 24303/25945
Processing post: 24304/25945
Processing post: 24305/25945
Processing post: 24306/25945
Processing post: 24307/25945
Processing post: 24308/25945
Processing post: 24309/25945
Processing post: 24310/25945
Processing post: 24311/25945
Processing post: 24312/25945
Processing post: 24313/25945
Processing post: 24314/25945
Processing post: 24315/25945
Processing post: 24316/25945
Processing post: 24317/25945
Processing post: 24318/25945
Processing post: 24319/25945
Processing post: 24320/25945
Processing post: 24321/25945
Processing post: 24322/25945
Processing post: 24323/25945
Processing pos

Processing post: 24573/25945
Processing post: 24574/25945
Processing post: 24575/25945
Processing post: 24576/25945
Processing post: 24577/25945
Processing post: 24578/25945
Processing post: 24579/25945
Processing post: 24580/25945
Processing post: 24581/25945
Processing post: 24582/25945
Processing post: 24583/25945
Processing post: 24584/25945
Processing post: 24585/25945
Processing post: 24586/25945
Processing post: 24587/25945
Processing post: 24588/25945
Processing post: 24589/25945
Processing post: 24590/25945
Processing post: 24591/25945
Processing post: 24592/25945
Processing post: 24593/25945
Processing post: 24594/25945
Processing post: 24595/25945
Processing post: 24596/25945
Processing post: 24597/25945
Processing post: 24598/25945
Processing post: 24599/25945
Processing post: 24600/25945
Processing post: 24601/25945
Processing post: 24602/25945
Processing post: 24603/25945
Processing post: 24604/25945
Processing post: 24605/25945
Processing post: 24606/25945
Processing pos

Processing post: 24856/25945
Processing post: 24857/25945
Processing post: 24858/25945
Processing post: 24859/25945
Processing post: 24860/25945
Processing post: 24861/25945
Processing post: 24862/25945
Processing post: 24863/25945
Processing post: 24864/25945
Processing post: 24865/25945
Processing post: 24866/25945
Processing post: 24867/25945
Processing post: 24868/25945
Processing post: 24869/25945
Processing post: 24870/25945
Processing post: 24871/25945
Processing post: 24872/25945
Processing post: 24873/25945
Processing post: 24874/25945
Processing post: 24875/25945
Processing post: 24876/25945
Processing post: 24877/25945
Processing post: 24878/25945
Processing post: 24879/25945
Processing post: 24880/25945
Processing post: 24881/25945
Processing post: 24882/25945
Processing post: 24883/25945
Processing post: 24884/25945
Processing post: 24885/25945
Processing post: 24886/25945
Processing post: 24887/25945
Processing post: 24888/25945
Processing post: 24889/25945
Processing pos

Processing post: 25139/25945
Processing post: 25140/25945
Processing post: 25141/25945
Processing post: 25142/25945
Processing post: 25143/25945
Processing post: 25144/25945
Processing post: 25145/25945
Processing post: 25146/25945
Processing post: 25147/25945
Processing post: 25148/25945
Processing post: 25149/25945
Processing post: 25150/25945
Processing post: 25151/25945
Processing post: 25152/25945
Processing post: 25153/25945
Processing post: 25154/25945
Processing post: 25155/25945
Processing post: 25156/25945
Processing post: 25157/25945
Processing post: 25158/25945
Processing post: 25159/25945
Processing post: 25160/25945
Processing post: 25161/25945
Processing post: 25162/25945
Processing post: 25163/25945
Processing post: 25164/25945
Processing post: 25165/25945
Processing post: 25166/25945
Processing post: 25167/25945
Processing post: 25168/25945
Processing post: 25169/25945
Processing post: 25170/25945
Processing post: 25171/25945
Processing post: 25172/25945
Processing pos

Processing post: 25422/25945
Processing post: 25423/25945
Processing post: 25424/25945
Processing post: 25425/25945
Processing post: 25426/25945
Processing post: 25427/25945
Processing post: 25428/25945
Processing post: 25429/25945
Processing post: 25430/25945
Processing post: 25431/25945
Processing post: 25432/25945
Processing post: 25433/25945
Processing post: 25434/25945
Processing post: 25435/25945
Processing post: 25436/25945
Processing post: 25437/25945
Processing post: 25438/25945
Processing post: 25439/25945
Processing post: 25440/25945
Processing post: 25441/25945
Processing post: 25442/25945
Processing post: 25443/25945
Processing post: 25444/25945
Processing post: 25445/25945
Processing post: 25446/25945
Processing post: 25447/25945
Processing post: 25448/25945
Processing post: 25449/25945
Processing post: 25450/25945
Processing post: 25451/25945
Processing post: 25452/25945
Processing post: 25453/25945
Processing post: 25454/25945
Processing post: 25455/25945
Processing pos

Processing post: 25705/25945
Processing post: 25706/25945
Processing post: 25707/25945
Processing post: 25708/25945
Processing post: 25709/25945
Processing post: 25710/25945
Processing post: 25711/25945
Processing post: 25712/25945
Processing post: 25713/25945
Processing post: 25714/25945
Processing post: 25715/25945
Processing post: 25716/25945
Processing post: 25717/25945
Processing post: 25718/25945
Processing post: 25719/25945
Processing post: 25720/25945
Processing post: 25721/25945
Processing post: 25722/25945
Processing post: 25723/25945
Processing post: 25724/25945
Processing post: 25725/25945
Processing post: 25726/25945
Processing post: 25727/25945
Processing post: 25728/25945
Processing post: 25729/25945
Processing post: 25730/25945
Processing post: 25731/25945
Processing post: 25732/25945
Processing post: 25733/25945
Processing post: 25734/25945
Processing post: 25735/25945
Processing post: 25736/25945
Processing post: 25737/25945
Processing post: 25738/25945
Processing pos

In [14]:
import re
count_yes = {'1': 0, '2': 0, '3': 0}
responses_part_4 = {}
last_processed_index, responses = load_progress_and_responses()

count = 0
for response, post in zip(responses, user_posts):
    count += 1
    parts = response.split('\n')
     # Check if the response follows the structured format
    if re.match(r'^\d\)', response):
        parts = re.split(r'(?=\d\))', response)
        for part in parts:
            part = part.strip()
            if part.startswith('1)'):
                if 'Yes' in part:
                    count_yes['1'] += 1
            elif part.startswith('2)'):
                if 'Yes' in part:
                    count_yes['2'] += 1
            elif part.startswith('3)'):
                if 'Yes' in part:
                    count_yes['3'] += 1
                    print(post)
                    print(response)
            elif part.startswith('4)'):
                answer = part.partition('4) ')[2].strip()
                responses_part_4[answer] = responses_part_4.get(answer, 0) + 1
                
            else:
                print(part)
    else:
        # Handle less structured format
        parts = re.split(r',|\n', response)
        for i, part in enumerate(parts, start=1):
            part = part.strip()
            if i <= 3:  # For parts 1, 2, 3
                if 'Yes' in part:
                    count_yes[str(i)] += 1
            elif i == 4:  # For part 4
                responses_part_4[part] = responses_part_4.get(part, 0) + 1
            else:
                print(part)

                
print(count)
print("Count of 'Yes' responses for each part:")
for part, count in count_yes.items():
    print(f"Part {part}: {count}")

print("\nFrequency of responses for part 4:")
for response, count in responses_part_4.items():
    print(f"'{response}': {count}")






















































































































































































My best friend and her husband of 15 years have been hoping to be parents for a long time. They have had two miscarriges (one last year at this time that was devastating) Mary is 39 and Brian is 38. She does not feel she can go through another pregnancy loss. She has had it on her heart for awhile to adopt, her husband had not gotten there yet. Yesterday at my church, there was information about a young teen looking for a Christian couple to adopt her baby due in December. It was like a bolt from above I knew I had to tell my friend of this situation. I talked to my husband about this (he freaked at first thinking I wanted to adopt a fouth baby) after I calmed him down and explained, he agreed that they needed to know. When I called her, she then talked to her husband and they called back in ten minutes to sa































































































































































































































I agree... I know how wonderful it is to get "The Call" and for everything to seem perfect. But, I also know the pain of having to turn a month and half plan down because the bm's ideas kept getting more and more one sided. Please pray about this situation and trust your gut instinct. You've waited a long time to be a mother and the child of your dreams will come to you if you put yourselves in God's hands. Good Luck with your decision.  
Hi, I know what you mean by wanting to know some of the "fun" things about the birthparents. I too wanted to know all of the specifics, the real obvious things and after my nerves clamed down I was able to ask our sons birthmother all sorts of fun things. For example: Favorite color, favorite subject in school, favorite type of music






















































































































































































































































































































































































I have never thought much about it until we found out this week our DD who is 7 weeks old is not "1/2" German, but is "1/2" Armenian. (We are currently working with a 2nd presumed BFather.) When her first "BFather" was 100% German she was not anything other than CC. ("Low German" we hear it's called when they have dark hair, eyes, etc.) But now the hopefully "real" BFather is 100% Armenian and the comments we get is that she is "bi-racial." Is there a term for children that are CC and of middle eastern decent?  
MAM takes after bdad's side of the family. His mom sent us some pictures of herself when she was a preschoole






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































acleeWell if the trend toward racial ambigous people continues I guess we can count on Matty having potential as a news anchor. Which might just be perfect since the kid is never quiet and he seems to have an opinion on just about everything. I totally see DD doing this in the future. Sometimes with her play acting she'll gather some papers and sit in a chair and start off with her tag line...Welcome to the Local 8 News. It's so cute to see her do this. Katie Couric has nothing on this girl. ;)  
I had no idea Slash was bi-racial either especially under that massive amount of hair. To be honest I don't know if I've ever really ha


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































0) I knew they were 'the ones'




























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































Would you all then agree that Scripture is most often interpreted based on any one individual's experience? No matter the size of a congregation, it would seem to me that there are exactly that same number of interpretations of sermons and so forth. And, our understandings and feelings regarding such are so very dependant on our own lives and past. I can read the Bible over and over amd sometimes I am simply struck by something I swear I have never read before. It suddenly makes sense to me or the lesson "meant" by whhat i am reading changes. And though I honestly believe that there are just as many interpretations of the Bible as there are people to read it, I do believe t

















































































































































































































































































































































































































































































































































































































































































































I'm filling out kindergarten papers for my soon to be 5yr old. Ahh how did he get so big?? Anyway, it has 2 spots that I'm kinda at a loss: Ethnicity- Hispanic? Race - I just wrote no on the Ethnicity -Hispanic? spot but am unsure of what the "correct" term is to put under race? Do I write AA or black? I know t









































































































































































































































































































































































































































Hi, Just a quick intro then a question. We are in Utah and we are matched with a baby boy due Jan 15. I know we have to wait until we finalize to have him sealed, but can we do the baby blessing before that? I keep forgetting to ask my bishop, so if anyone knows that would be great. Thanks Rachel  
I really don't know much about most of your post, but I would love to be apart of number 5, I don't know if I personaly would want to be hosted but I would definatly host. Also as far as OA maybe coming up with some questions for the pap, and the emom's to ask each other, and





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































In [15]:
import pandas as pd

def clean_text(text):
    if isinstance(text, str):
        return text.encode('utf-8', 'replace').decode('utf-8')
    return text

# Assuming 'responses' and 'user_posts' are your lists
responses_clean = [clean_text(item) for item in responses]
user_posts_clean = [clean_text(item) for item in user_posts]

# Pairing lists and creating a DataFrame
df = pd.DataFrame(list(zip(responses_clean, user_posts_clean)), columns=['response', 'posts'])

# Writing DataFrame to an Excel file
try:
    df.to_excel('output_data1_final.xlsx', index=False)
except Exception as e:
    print("Error while writing to Excel:", e)


In [21]:
import pandas as pd

# Load the Excel file into a pandas DataFrame
df = pd.read_excel('output_data1_final.xlsx', header=None)


# Define a function to transform the column entries
def transform_column(text):
    text = str(text)
    parts = text.split(',')
    # For each part, split on the colon and take the first element (0th index)
    transformed_parts = [part.split(':')[0] for part in parts]
    # Join the transformed parts with a comma
    transformed_text = ', '.join(transformed_parts)    
    return transformed_text

# Define a function to split the first column into four new columns based on a specific pattern, handling newlines and unexpected patterns
def split_first_column(text):
    # Initialize an empty list to hold the split results and a variable to track the current index
    split_results = ['', '', '', '']
    current_index = None  # Initialize current_index as None to indicate no current section has been identified
    
    # Normalize newline characters and then split based on the pattern "n)"
    parts = str(text).replace('\n', ' ').split(' ')
    temp_part = ''  # Initialize a temporary part accumulator
    
    for part in parts:
        if part.startswith('1)'):
            if temp_part and current_index is not None:  # If there's accumulated text, save it
                split_results[current_index] = temp_part
            temp_part = part[2:]  # Start new text accumulation
            current_index = 0
        elif part.startswith('2)'):
            if temp_part and current_index is not None:  # Save previous accumulated text
                split_results[current_index] = temp_part
            temp_part = part[2:]  # Start new text accumulation
            current_index = 1
        elif part.startswith('3)'):
            if temp_part and current_index is not None:  # Save previous accumulated text
                split_results[current_index] = temp_part
            temp_part = part[2:]  # Start new text accumulation
            current_index = 2
        elif part.startswith('4)'):
            if temp_part and current_index is not None:  # Save previous accumulated text
                split_results[current_index] = temp_part
            temp_part = part[2:]  # Start new text accumulation
            current_index = 3
        else:
            if current_index is not None:
                temp_part += ' ' + part  # Accumulate text only if we've started capturing

    # Save the last accumulated part
    if temp_part and current_index is not None:
        split_results[current_index] = temp_part

    return split_results

# Apply the function to the first column and create four new columns
df[['Split1', 'Split2', 'Split3', 'Split4']] = pd.DataFrame(df.iloc[:, 0].apply(split_first_column).tolist(), index=df.index)

# Drop the original first column
df.drop(df.columns[0], axis=1, inplace=True)

df.insert(4, 'Duplicated', df.iloc[:, 4])

# Apply the transformation to the duplicated column
df['Duplicated'] = df['Duplicated'].apply(transform_column)

# Add an index column that represents the row number
df.reset_index(drop=False, inplace=True)

# Rename the index column to "Row_Number"
df.rename(columns={'index': 'Row_Number'}, inplace=True)


# Save the updated DataFrame back to an Excel file
df.to_excel('data1_final.xlsx', index=False, header=False)


In [3]:
import pandas as pd
import re

# Load the Excel file into a pandas DataFrame
df = pd.read_excel('data1_final.xlsx', header=None)


def transform_column(text):
    if not isinstance(text, str):
        # Handle the non-string input here. For example, return an empty string or convert to string
        text = str(text)  # Convert to string, or you can choose to return '' if you want to ignore non-strings
    
    # Remove all characters inside parentheses along with the parentheses themselves
    text_no_parentheses = re.sub(r'\s*\([^)]*\)', '', text)
    # Split the text by commas and semicolons to handle different delimiters
    parts = re.split(r',\s*|;\s*', text_no_parentheses)
    transformed_parts = []
    for part in parts:
        # Extract the key before the colon if it exists, otherwise take the whole part
        key = part.split(':')[0] if ':' in part else part
        # Remove any trailing punctuation from the key
        key_clean = re.sub(r'\W+$', '', key)
        transformed_parts.append(key_clean.strip())
    # Join the transformed parts with a comma and space
    transformed_text = ', '.join(transformed_parts)
    return transformed_text




# Apply the transformation to the duplicated column
df[5] = df[6].apply(transform_column)



# Save the updated DataFrame back to an Excel file
df.to_excel('data1_final_reformatted.xlsx', index=False, header=False)
