This notebook is used to generate the datasets for the data privacy locked room. The general goal is to produce medium sized datasets by randomly choosing from lists of each feature, and then replace one of the entries with the clue that's to be hidden in the dataset. The data can then be exported to CSV and loaded into the activity notebook.

In [None]:
import pandas as pd
import numpy as np
import random

## First question

In [None]:
# Breeds adapted from from https://www.wewagtoronto.ca/post/a-look-at-10-most-popular-dogs-in-canada
breeds = ['Labrador Retriever',
          'German Shepard',
          'Bernese Mountain Dog',
          'Poodle',
          'Corgi',
          'Pit Bull',
          'Chihuahua',
          'Dalmatian',
          'Shetland Sheepdog',
          'French Bulldog'
         ]
# Names taken from https://www.akc.org/dog-name-finder/unisex/
names = [
'Addie',
'Addison',
'Adler',
'Admiral',
'Ajax',
'Amy Rose',
'Aubrey',
'Avery',
'Baby',
'Bailey',
'Barclay',
'Bashful',
'Baxter',
'Bayleaf',
'Beamer',
'Bean',
'Beans',
'Bentley',
'Berkeley',
'Beta',
'Bing',
'Birdie',
'Biscuit',
'Bit',
'Bitsy',
'Bitty',
'Bizzy',
'Blackie',
'Blake',
'Blaze',
'Blondie',
'Blue',
'Blues',
'Bobo',
'Body',
'Bogey',
'Bohemian',
'Bongo',
'Bonkers',
'Boo',
'Breeze',
'Brooklyn',
'Brownie',
'Buffy',
'Bug',
'Bullet',
'Bungee',
'Bunker',
'Cabernet',
'Cairo',
'Cali',
'Captain',
'Carmen Sandiego',
'Casey',
'Castle',
'Channing',
'Chaos',
'Charlie',
'Chew Chew',
'Chewy',
'China',
'Chocolate',
'Church',
'Cleveland',
'Commander Shepard',
'Cookie',
'Cromwell',
'Crumble',
'Crush',
'Cuddles',
'Curly',
'Cyber',
'Cypher',
'Dab',
'Dakota',
'Dallas',
'Dash',
'Data',
'Digger',
'Dracula',
'Drop',
'Duffy',
'Echo',
'Elf',
'Falco Lombardi',
'Feisty',
'Finn',
'Fiver',
'Flash',
'Fluffy',
'Flynn',
'Frankie',
'Frisky',
'Gidget',
'Giga',
'Gizmo',
'Goldie',
'Goody',
'Google',
'Gucci',
'Gumball',
'Gypsy',
'Happy',
'Harley',
'Harper',
'Hint',
'Hobbit',
'Hug',
'Hurricane',
'Inca',
'India',
'Itty',
'Itty Bitty',
'Izzy',
'Jabba',
'Jambalaya',
'Jango',
'Java',
'Jelly',
'Jellybean',
'Jersey',
'Jot',
'Kiss',
'Lara Kroft',
'Laser',
'Later',
'Latte',
'Leifang',
'Lemon',
'Licorice',
'Limerick',
'Linux',
'Linx',
'Lucky',
'Magic',
'Marley',
'Mellow',
'Midnight',
'Mocha',
'Mochi',
'Moondoggie',
'Morgan',
'Mouse',
'Mr. Rogers',
'Muffin',
'Muppet',
'Nelly',
'Newbie',
'Nibbles',
'Olive',
'Ono',
'Onyx',
'Opal',
'Oracle',
'Oregon',
'Oreo',
'Ounce',
'Papillon',
'Peanut',
'Peyton',
'Pipsqueak',
'Pixel',
'Pooch',
'Pookie',
'Popcorn',
'Preston',
'Princess Peach',
'Puddles',
'Pumpkin',
'Quinn',
'Quyen',
'Red',
'Regal',
'Riley',
'Rolls',
'Rory',
'Royce',
'Saber',
'Sam',
'Sammy',
'Sandy',
'Shadow',
'Shell',
'Silver',
'Skitty',
'Sky',
'Sleepy',
'Smidge',
'Snip',
'Sugar',
'Symphony',
'Tamago',
'Tease',
'Tickles',
'Token',
'Tori',
'Tucker',
'Uber',
'Velocity',
'Wags',
'Whiskey',
'Wicket',
'Widget',
'Wiggles',
'Willow',
'Yappy',
'Zip'
]

# Owners generated from https://www.behindthename.com/random/
owners = [
'Mike',
'Flora',
'Eugenia',
'Da',
'Lionel',
'Ivona',
'Kellen',
'Dina',
'Allen',
'Filip',
'Cortney',
'Frida',
'Célestine',
'Narcisse',
'Shimon',
'Joanne',
'Zhou',
'Dione',
'Randall',
'Kaila',
'Eloy',
'Khloe',
'Genrikh',
'Faris',
'Anastasio',
'Ester',
'Ellen',
'Rachel',
'Dafne',
'Gloriana',
'Theodore',
'Su-Jin',
'Petros',
'Naldo',
'Olimpiada',
'Annemarie',
'Ľuboš',
'Giustina',
'Cecilia',
'Azra',
'Ionas',
'Vivyan',
'Lesly',
'Rae',
'Aneta',
'Berniece',
'Aubert',
'Elisabeti',
'Magdalena',
'Kurt',
'Joi',
'Benedict',
'Jean-Marie',
'Eleonora',
'Dora',
'Raphael',
'Osborn',
'Grethe',
'Dane',
'Jeong',
"D'Artagnan",
'Matilda',
'Eldon',
'Rupert',
'Cierra'
]

# Phone number generator from https://stackoverflow.com/questions/26226801/making-random-phone-number-xxx-xxx-xxxx
def gen_phone():
    first = str(random.randint(100,999))
    second = str(random.randint(1,888)).zfill(3)
    last = (str(random.randint(1,9998)).zfill(4))
    while last in ['1111','2222','3333','4444','5555','6666','7777','8888']:
        last = (str(random.randint(1,9998)).zfill(4))
        
    return '{}-{}-{}'.format(first,second, last)

In [None]:
random.seed(42) # for reproducibility
petDf = pd.DataFrame(columns=['Pet Name', 'Breed', 'Owner', 'Owner Phone Number'])
petDf['Pet Name'] = random.choices(names, k=100)
petDf['Breed'] = random.choices(breeds, k=100)
petDf['Owner'] = random.choices(owners, k=100)
petDf['Owner Phone Number'] = [gen_phone() for x in range(0,100)]
petDf.iloc[22] = ['Charlie', 'Golden Retriever', 'Jason', '834-352-0013']
petDf

In [None]:
# Verify clue data is in dataframe
petDf.iloc[22]

In [None]:
# Export CSV
petDf.to_csv('data/PetGroupContactList.csv', index=False)

## Second Question

In [None]:
# Search data from:
# https://perchance.org/googlesearchhistory
# https://keywordtool.io/

searches = [ 
'hhhnlqhgfie',
"What's Jake Paul's first name?",
'Nearest hospital.',
'Roblox is good',
'Gucci rocks',
'890÷63=?',
"who's Logan Paul's sister",
'youtube videos.',
'Map of mexico',
"World's strongest man",
'How do I be tall?',
'How many miles away is canada.',
'minecraft.',
'how do I tie my shoes in less than 5 seconds',
'words that end with b',
'Cat videos',
'when is the next February 29?',
'955+31=?',
'Nearest school',
'dog videos.',
'Roblox memes.',
'2019 Cars.',
'xyxmuzictzc',
'Map of UK',
'cat videos that are funny',
'436×102',
'words that start with h.',
"Who's Jake Paul's step son",
'fortnite accounts that got banned.',
"World's tallest dog.",
'Words that end with q.',
'fortnite videos.',
'how to be rich',
'399+713=?',
'how many steps does it take to get to UK.',
'bldlhzjufhn',
'gnktvnhltfy',
'When is christmas',
'Where am I',
'how many kilometers away is UK?',
'how to get a life',
"what's Logan Paul's first name?",
'When is new years eve?',
'Gucci rocks.',
'How do I be rich in less than 2 days',
'words that end with m.',
'Snake videos that are funny',
"what's Michael Jackson's first name",
'Minecraft memes!',
"World's strongest skyscraper.",
"who's Elvis Presley's step son",
"what's Jake Paul's blood type",
'What Is A Bird Exactly?',
'That Movie With The Guy',
'For DND Purposes Only',
'How To Be Clean',
'How To Hard Boil Eggs',
'Get Alton Brown On The Phone',
"what's Obama's first name",
'whatsapp',
'paypal',
'are there bugs in peanut butter',
'are the chrisleys going to jail',
'are there snakes in hawaii',
'are the lakers in the playoffs',
'are there snakes in ireland',
'are there crocodiles in florida',
'are the celtics playing tonight',
'are the chrisleys in jail 2022',
'are the dodgers playing today',
'can the president declare war',
'can the lakers make the playoffs',
'can the constitution be changed',
'can therapists diagnose',
'can the warden break blocks',
'can the liver heal itself',
'can the master sword break',
'can the liver regenerate',
'can they be singular',
'can the subaltern speak',
'did the warriors win',
'did the celtics win',
'did the yankees win today',
'did the bucks win',
'did they find harmony montgomery',
'did the lakers make the playoffs',
'did the cubs win today',
'did the suns win',
'did the mavericks win',
'did the celtics win tonight',
'do the right thing',
'do the roar',
'do the roar kid',
'do the warriors play tonight',
'do the astros play today',
'do the celtics play tonight',
'do the yankees play today',
'do the right thing cast',
'do the dodgers play today',
'do the cubs play today',
'does the moon rotate',
'does the sun rise in the east',
'does the sun rotate',
'does the moon have an atmosphere',
'does the sun move',
'does the moon orbit the earth',
'does the president control inflation',
'does the moon have gravity',
'does theo die in first kill',
'had the time of my life',
'had the gall',
'had the pleasure synonym',
'had the opportunity synonym',
'had thesaurus',
'had the gall to say',
'had the support of president johnson',
'had the pleasure of meeting',
'had the radish',
'have they found harmony montgomery',
'have they found anything on oak island',
'have they found dylan rounds',
'have the browns won a superbowl',
'have the day you deserve',
'have the suns won a championship',
'have thesaurus',
'have their cake and eat it too',
"have they found noah's ark",
'have the knicks won a championship',
'how the grinch stole christmas',
'how the west was won',
'how the market works',
'how the universe works',
'how the word is passed',
'how the turntables',
'how the west was won cast',
'is there a full moon tonight',
'is there school tomorrow',
'is the stock market crashing',
'is the flu contagious',
'is the iphone 13 waterproof',
'is the quarry multiplayer',
'is the batman on hbo max',
'is the us in a recession',
'should the electoral college be abolished',
'should the be capitalized in a title',
'should there be a comma before and',
'should the voting age be lowered',
'was there an earthquake just now',
'was there a stimulus check in 2021',
'was the trojan war real',
'was the titanic real',
'was the trojan horse real',
'was the new deal a success',
'was theodore roosevelt a good president',
'was the marshall plan successful',
'was there a tornado last night',
'was the joker in the batman',
'what the font',
'what the weather',
'what the dog doin',
'what the health',
'what the weather today',
'what the meaning of this symbol',
'what the weather tomorrow',
'what the world needs now',
'when they see us',
'when the imposter is sus',
'when the levee breaks',
"when the party's over lyrics",
'when the wind blows',
'when the saints go marching in',
'when the game stands tall',
'when the bough breaks',
'when they cry',
'when the sun goes down',
'where the crawdads sing',
'where the wild things are',
'where the red fern grows',
'where the heart is',
'where the sidewalk ends',
'where the crawdads sing book',
'where the crawdads sing trailer',
'where the crawdads sing summary',
'which theme is addressed in both excerpts',
'which there to use',
'which the batman character are you',
'which theragun to buy',
'which the wilds character are you',
'which thermometer is most accurate',
'which the office character are you',
'which theme is common to the two excerpts',
'which theme is best reinforced by the excerpt',
'which theme is best supported by the passage',
'who the richest man in the world',
'who the boss',
'who the oldest person in the world',
'who the tallest person in the world',
'who the richest rapper',
'who the boss cast',
'who the strongest anime character',
'who the smartest person in the world',
'who the son sets free',
"who's the boss",
"who's the richest person in the world",
"who's the boss cast",
"who's the tallest person in the world",
"who's the oldest person in the world",
"who's the oldest person alive",
"who's the killer in scream 2022",
"who's the smartest person in the world",
"who's there",
'why the caged bird sings',
'why the long face',
'why the sky is blue',
'why the electoral college is bad',
'why the american dream is unattainable',
'why the flooding in yellowstone',
'will there be a season 7 of peaky blinders',
'will the housing market crash',
'will there be a season 5 of stranger things',
'will there be a season 5 of ozark',
'will there be a season 4 of barry',
'will there be a recession',
'will there be a season 6 of the last kingdom',
'will the stock market recover',
'will there be a season 3 of bridgerton',
'will there be a season 5 of all american',
'would the us defend taiwan',
'would theranos have worked',
'would there be a draft for ww3',
'would these arms be in your way',
'would there be a draft for war with russia',
'would thesaurus',
'would the fbi text you',
'would there ever be another draft',
'what is my ip',
'where is my train',
'where is my refund',
'where is my stimulus check',
'what is my location',
'how much is my car tax',
'is my show cancelled',
'what is my chart',
'is my switch patched',
'what is my hero academia on',
'is my a pronoun',
'is my apple watch waterproof',
'is my ankle sprained or broken',
'is my alberta digital id down',
'is my a noun',
'is my antidepressant working quiz',
'is my alberta digital id legit',
'is my android phone unlocked',
'is my abandonware safe',
'is my an adjective',
'a is my life line dp',
'a is my life images',
'is my baby teething',
'is my baby sick or teething',
'is my blueberry bush dead',
'is my baby sleeping too much',
'is my baby cold at night',
'because this is my first life',
'bon jovi is my life',
'boy is my',
'birthstone what is my',
'beatles this is my life',
'breed is my cat',
'birthday when is my birthday',
'baby is my love',
'breed is my dog',
'blue is my mind',
'is my cat depressed',
'is my computer 32 or 64',
'is my cat in heat',
'is my crc legit',
'is my computer on',
'is my car registered',
'is my cat stressed',
'chords this is my life',
'check where is my stimulus check',
'check how much is my car tax',
'chinese zodiac what is my',
'check where is my refund',
'coronavirus is my area',
'ca when is my turn',
'congressman who is my',
'check what is my ip',
'check where is my car',
'is my dog depressed',
'is my dress up darling finished',
'is my dog happy',
'is my dress up darling manga finished',
'is my dress up darling good',
'is my dog sick',
'is my dog in pain',
'is my dog dying quiz',
'dolemite is my name',
'dude where is my car',
'dvla is my car insured',
'download where is my train app',
'design graphic is my passion',
'dvla how much is my car tax',
'dude where is my car cast',
'dr alban is my life',
'demon queen is my wife',
'dancing is my room',
'is my email pwned',
'is my email on a spam list',
'is my employer a gst/hst registrant',
'is my email blacklisted',
'is my equifax legit',
'is my essay good checker',
'is my eye prescription bad',
'is my eta still valid',
'enemy of my enemy is my friend',
'elevation what is my',
'ethnicity what is my',
'electricity supplier who is my',
'eye is my eye twitching',
'en is my mot due',
'email id what is my email id',
'eminem my name is my name is',
'email address what is my email address',
'is my fitness pal free',
'is my foot broken',
'is my friend a sociopath quiz',
'is my fitbit waterproof',
'is my face symmetrical',
'is my foot broken or sprained',
'is my flight on time',
'for the joy of the lord is my strength',
'for the lord is my shepherd',
'federal where is my refund',
'from where is my ip',
'for this is my body',
'frank sinatra this is my way',
'find where is my train',
'find is my phone',
'family is my',
'find is my device',
'is my girl',
'is my girl on netflix canada',
'is my girl on netflix',
'is my ground beef bad',
'is my grass dead or dormant',
'is my graphics card dying',
'graphic design is my passion',
'google what is my location',
'google what is my password',
'ga where is my refund',
'green is my valley',
'google is my activity',
'google what is my account',
'google maps what is my location',
'google what is my account password',
'is my hero academia over',
'is my hero academia on netflix',
'is my hydrangea dead',
'is my hair thinning',
'is my hairline receding',
'is my hero academia manga over',
'is my hand broken quiz',
'how much is my stimulus check',
'how much is my social security',
'how is my ip',
'how much is my refund',
'mypay',
'how much is my pillow',
'how much is my heritage',
'how is my train',
'how is my location',
'is my internet working',
'is my iphone unlocked',
'is my iphone hacked',
'is my internet down',
'is my iphone 12 waterproof',
'is my ip blacklisted',
'is my ipad too old to update',
'is my iphone listening to me',
'is my iphone waterproof',
'is my instagram hacked',
'irs where is my refund',
'ip address what is my',
'it is my love',
'it is my way',
'i boy is my',
'id is my gangnam beauty',
'it is my school',
'is my train where is my train',
'it is my name',
'it is my girl',
'is my juniper dead',
'is my japanese maple dead',
'is my juniper bonsai dead',
'is my jam',
'is my javascript enabled',
'is my jeep a jk or jl',
'is my jaw out of place',
'is my java up to date',
'is my jam synonym',
'joy of the lord is my strength',
'jesus christ is my nga',
'jesus is my friend',
'jesus is my',
'joy of the lord is my strength kjv',
'jesus is my savior',
'jesus is my saviour',
'judas is my mind',
'justin bieber this is my world',
'jesus christ is my living hope',
'is my kitten overweight',
'is my keyboard working',
'is my koodo phone unlocked',
'is my kitten in heat',
'is my kitten sick',
'is my kinder egg recall',
'is my kitten teething',
'is my knee dislocated',
'is my knee pain serious',
'kim is my lawyer',
'kindness is my superpower',
'keep calm tomorrow is my birthday',
'keep calm today is my birthday',
'kbb how much is my car worth',
'kjv the joy of the lord is my strength',
'kale is my name',
'kiskis is my boyfriends are mint candies',
'ky where is my refund',
'kingdom come this is my kingdom come',
'is my lavender dead',
'is my little pony on disney plus',
'is my license suspended',
'is my little pony over',
'is my laptop 64 bit',
'is my life a dream',
'is my laptop compatible with windows 11',
'is my little pony disney',
'is my little pony on netflix',
'is my life',
'lord is my shepherd',
'love is my life',
'lyrics this is my life',
'lyrics where is my love',
'love is my message',
'land is my land',
'love is my love',
'love is my life lyrics',
'location what is my location',
'life is my life',
'is my mac too old to update',
'is my marriage over',
'is my mole cancerous',
'is my mic working',
'is my mini rose plant dead',
'm is my life images',
'm is my life line',
'm is my life wallpaper',
'is my newborn sleeping too much',
'is my nose piercing infected',
'is my number blocked',
'is my nose big',
'is my neighbor totoro sad',
'is my newborn eating too much',
'is my newborn sick',
'is my name written there',
'is my newborn hungry',
'nys where is my refund',
'nj where is my refund',
'nc where is my refund',
'nobody is my name',
'nhs what is my nhs number',
'now where is my location',
'north node what is my',
'no doubt is my life',
'is my orchid dead',
'is my oven in f or c',
'is my oven celsius or fahrenheit',
'is my outlook calendar visible to others',
'is my office 32 or 64',
'is my ornamental grass dead',
'is my oc good',
'is my oven gas or electric',
'is my phone unlocked',
'is my port open',
'is my phone hacked',
'is my phone listening to me',
'is my puppy aggressive quiz',
'is my pc vr ready',
'is my phone waterproof',
'is my phone locked',
'is my password strong',
'pixies where is my mind',
'pledge india is my country',
'pa where is my refund',
'piano where is my mind',
'popular is my birthday',
'pokemon how much is my card worth',
'phone number what is my phone number',
'passport where is my',
'patronus what is my',
'public ip address what is my',
'is my quarter valuable',
'is my queen',
'is my qashqai j10 or j11',
'is my qualification recognised in australia',
'is my quartz real',
'is my quarter silver',
'is my qantas flight on time',
'is my quiche done',
'is my q free',
'quilting is my therapy',
'quiz what is my spirit animal',
'quiz what is my aesthetic',
'quiz what is my love language',
'quiz who is my soulmate',
'quiz what is my zodiac sign',
'quiz what is my style',
'queen love is my life',
'is my rose bush dead',
'is my rapid test positive',
'is my registration expired',
'is my russian sage dead',
'rihanna what is my name',
'rare is my birthday',
'rascal flatts this is my wish',
'rising sign what is my',
'riding is my life',
'robot is my boyfriend',
'refund where is my refund',
'resolution what is my',
'ringtone is my name',
'is my spouse a dependent canada',
'is my strange addiction real',
'is my service canada down',
'is my sore throat covid',
'is my story animated real',
'is my site down',
'is my spouse a dependent',
'is my snail dead',
"where's my stimulus payment",
'state where is my refund',
'story is my life',
'spirit animal what is my',
'strength of the lord is my joy',
'sun sign what is my',
'story is my life lyrics',
'show is my ip',
'song this is my girl',
'she is my out of league',
'is my toe broken',
'is my toe broken quiz',
'is my tattoo infected',
'is my tv show cancelled',
'is my tooth infected',
'is my thumb broken quiz',
'is my tv 4k',
'is my thyroid working',
'is my tattoo supposed to peel',
'that is my boy',
'this is my school',
'that is my girl',
'this is my way',
'today weather is my location',
'this is my name',
'this is my ip',
'train where is my train',
'this is my location',
'tax is my car',
'is my understanding correct',
'is my username taken',
'is my uk pension taxable in canada',
'is myus legit',
'is my upass active',
'is my unorthodox life real',
'is my usb bootable',
'is my unit rent controlled',
'usps where is my package',
'usps why is my package still in transit',
'utah where is my refund',
'uk how much is my house worth',
'user agent what is my',
'ulez is my car compliant',
'ups is my package lost',
'uk how much is my car worth',
'us representative who is my',
'uscis where is my case status',
'is my vehicle registered',
'is my vyvanse dose too low',
'is my vpn working',
'is my voice deep',
'is my versa 2 waterproof',
'is my vyvanse dose too high',
'is my vaccine certificate valid',
'is my vision getting worse',
'is my venus fly trap dead',
'is my voice annoying',
'verse the joy of the lord is my strength',
'vaccine when is my turn',
'va where is my refund',
'verse the lord is my shepherd',
'virgin media is my area down',
'vampire is my babysitter',
'viewport what is my',
'venus sign what is my',
'verizon how much is my bill',
'vpn what is my ip address',
'is my wifi working',
'is my windows 32 or 64',
'is my wrist broken or sprained',
'is my wifi 2.4 or 5',
'is my water leaking',
'is my weight healthy',
'is my wife a dependent',
'is my website down',
'is my wifi down',
'why is my eye twitching',
'why is my computer so slow',
'why is my tongue white',
'why is my hair falling out',
'why is my dog shaking',
'why is my nose always runny',
'why is my stomach growling so much',
'is my xbox controller bluetooth',
'is my xbox controller under warranty',
'xfinity is my internet down',
'xfinity why is my internet so slow',
'xfinity what is my wifi password',
'xfinity why is my wifi not working',
'xbox why is my mic not working',
'xfinity why is my wifi so slow',
'xfinity what is my password',
'xfinity what is my plan',
'xbox one this land is my land',
'xbox why is my latency so high',
'is my yeast dead',
'is my yucca plant dead',
'is my yeast good',
'is my yeti dishwasher safe',
'is my year of rest and relaxation satire',
'is my yogurt bad',
'is my yew dead',
'is my youtube history public',
'is my year of rest and relaxation true',
'your wish is my command',
'your love is my drug',
'your wish is my command meaning',
'your love is my love',
'you world is my world',
'your bestie is my bestie',
"you're my best friend",
'you is my life',
'is my zucchini bad',
'is my zebra grass dead',
'is my zip code my postal code',
'is my zoloft dose too high',
'is my zoom link always the same',
'is my zoloft working',
'is my zip code',
'is my zoom up to date',
'is my zodiac sign wrong',
'is my zodiac sign my sun sign',
'zoopla how much is my house worth',
'zillow how much is my house worth',
'zoom webinar is my camera on',
'zoom where is my recording',
'zero is my hero',
'zodiac what is my rising sign',
'zoom why is my camera not working',
'zip code what is my zip code',
'zodiac sign what is my zodiac sign',
'zucchero she is my baby',
'is my 02 phone unlocked',
'is my 02 sensor bad',
'is my 02 down',
'is the 02 open',
'is the 0345 number free',
'is the 0300 number free',
'is the 0 or 1 on',
'is the 02 in greenwich',
'is the 0333 number free',
'is the 05 council a scp',
'02 what is my number',
'02 why is my data not working',
'02 when is my upgrade',
'0 is my hero',
'0 is my favorite number',
'my hair is green 015b',
'what is my 02',
'how much is my 02 bill',
'is my 10 nights legit',
'my blood sugar level is 17',
'my haemoglobin count is 114',
'what is my 11 circle',
'what is 100 days my prince about',
'what is covid 19 in my area',
'the world is my 17',
'what is my team 11',
'windows 10 where is my computer',
'is my 2 month old teething',
'is my 2 year old gifted',
'is my 2 year old color blind',
'is my 2 year old advanced',
'is my 2 year old done with naps',
'2019 where is my refund',
'2021 where is my stimulus check',
'2 is my favorite 1',
'2 is my lucky number',
'2 is my favorite number',
'2 is my favorite 1 lyrics',
'my vision for india is 2047',
'cyberpunk 2077 where is my mind',
'my girlfriend is alien season 2',
'is my 3 month old teething',
'is my 3 year old teething',
'is my 3 month old sick',
'is my 3060 lhr',
'is my 3 month old eating enough',
'is my 3rd eye open quiz',
'32-bit or 64-bit which is my computer',
'3 is my favorite number',
'3 is my lucky number',
'3 is my life path number',
'3 is my phone unlocked',
"where's my 3rd stimulus check",
'where is my office 365',
'my wife is a gangsters 3',
'is my 4 year old ready for kindergarten',
'4th dimension is my own mansion',
'4 is my favorite number',
'4 is my lucky number',
'my prolactin level is 40',
'why is my 4g not working',
'is bsnl 4g available in my area',
'54 46 is my number',
'54-46 is my number lyrics',
'is my 5 year old gifted',
'is my 5 month old teething',
'is my 5 free',
'is my 50p worth anything',
'50 cent is my birthday',
'5446 is my number',
'5 is my favorite number',
'is 5g available in my area',
'when is 5g coming to my area',
'what is my 5km radius',
'what is 50 mile radius from my location',
'my name is earl season 5',
'is my 600 lb life on netflix',
'is my 600 pound life coming back',
'is my 6 year old depressed',
'is my 600 lb life cancelled',
'is my 600 pound life on netflix',
'is my 6 month old teething',
'is my 600 pound life real',
'6 is my lucky number',
'6 is my favorite number',
'where is my 600 stimulus check',
'how is lacey from my 600 lb life',
'what is my 600 pound life on',
'how is cindy from my 600 lb life',
'where is samantha from my 600 lb life',
'where is jt my 600 lb life',
'where is amber from my 600 lb life',
'is my 7 week old teething',
'is my 7 month old teething',
'is my 7 year old dyslexic',
'is my 7 month old sleeping too much',
'7 is my lucky number',
'7 is my favorite number',
'7 is my life path number',
'7 is my angel number',
'my cholesterol is 7.1 is that high',
'what is my 7th house',
'my dog is 73 days pregnant',
'windows 7 where is my computer',
'is my 8 week old teething',
'is my 8 month old teething',
'is my 8 month old saying mama',
'8 is my favorite number',
'8 is my lucky number',
'8 is my life path number',
'what is my 8th house',
'what is 1-800-my-apple',
'mysql 8 where is my.ini',
'my roommate is a gumiho ep 8',
'is my 9 month old eating enough',
'is my 9 week old teething',
'is my 9 month old behind',
'is my 9 month old sleeping too much',
'is my 9 month old dehydrated',
'9 is my lucky number',
'9 is my life path number',
'9 is my angel number',
'9 is my favorite number',
'my temperature is 96',
'my temperature is 95',
'my oxygen level is 95',
'my resting heart rate is 90'
]

# Adapted from gen_phone code above to generate IP addresses
def gen_ip():
    first = str(random.randint(0,255)).zfill(3)
    second = str(random.randint(0,255)).zfill(3)
    third = str(random.randint(0,255)).zfill(3)
    last = (str(random.randint(0,255)).zfill(3))
    while last in ['1111','2222','3333','4444','5555','6666','7777','8888']:
        last = (str(random.randint(1,9998)).zfill(4))
        
    return '{}.{}.{}.{}'.format(first, second, third, last)

# Generate smaller lists to randomly choose larger dataset from
IPs_list = [gen_ip() for x in range(0,100)]
phones_list = [gen_phone() for x in range(0,75)]

# Build dataset
random.seed(101)
searchDf = pd.DataFrame(columns=['IP Address', 'Phone Number', 'Search History'])
searchDf['IP Address'] = random.choices(IPs_list, k=1000)
searchDf['Phone Number'] = random.choices(phones_list, k=1000)
searchDf['Search History'] = random.choices(searches, k=1000)

# Add clues
for i in range(0,10):
    searchDf.iloc[random.randint(5, len(searchDf))] = [gen_ip(), petDf['Owner Phone Number'].iloc[22], random.choice(searches)]
for ii in range(0,2):
    searchDf.iloc[random.randint(5, len(searchDf))] = [gen_ip(), petDf['Owner Phone Number'].iloc[22], 'is my blood pressure high']
for iii in range(0,3):
    searchDf.iloc[random.randint(5, len(searchDf))] = [gen_ip(), petDf['Owner Phone Number'].iloc[22], 'symptoms of high blood pressure']
for iv in range(0,3):
    searchDf.iloc[random.randint(5, len(searchDf))] = [gen_ip(), petDf['Owner Phone Number'].iloc[22], 'what is the NHANES study?']
searchDf

In [None]:
# Verify clues exist in dataframe
searchDf[searchDf['Phone Number']=='834-352-0013']

In [None]:
# Export CSV
searchDf.to_csv('data/InternetSearchLeak.csv', index=False)

In [12]:
d = {'A': [1, 2, 3, 4], 'B': [True, True, False, True], 'C': ['Cat', 'Dog', 'Snake', 'Cat']}
sample = pd.DataFrame(data=d)
sample

Unnamed: 0,A,B,C
0,1,True,Cat
1,2,True,Dog
2,3,False,Snake
3,4,True,Cat


In [14]:
sample[sample['C']=='Cat']

Unnamed: 0,A,B,C
0,1,True,Cat
3,4,True,Cat


In [15]:
sample.to_csv('data/sample.csv', index=False)

In [51]:
med = pd.read_sas('data/P_BPXO.xpt')[['SEQN', 'BPXOSY1']]
dem = pd.read_sas('data/P_DEMO.xpt')[['SEQN', 'RIDAGEYR', 'RIAGENDR', 'DMDMARTZ']]
both = pd.merge(med, dem, on='SEQN')
both.rename({'SEQN': 'ID', 
             'BPXOSY1': 'Systolic BP', 
             'RIAGENDR': 'Sex', 
             'DMDMARTZ': 'Marital Status',
             'RIDAGEYR': 'Age'}, axis=1, inplace=True)
both['Sex'].replace({1.0: 'Male', 2.0: 'Female'}, inplace=True)
both = both[both['Marital Status']<5]
both['Marital Status'].replace({1.0: 'Married/Living with Partner',
                               2.0: 'Widowed/Divorced/Separated',
                               3.0: 'Never married'}, inplace=True)


both.dropna(inplace=True)
both[(both['Systolic BP']>140) &
     (both['Sex']=='Male') &
     (both['Marital Status']=='Married/Living with Partner') &
     (both['Age']<30)]

Unnamed: 0,ID,Systolic BP,Age,Sex,Marital Status
4100,114782.0,141.0,24.0,Male,Married/Living with Partner
8487,120678.0,144.0,29.0,Male,Married/Living with Partner
8990,121358.0,152.0,29.0,Male,Married/Living with Partner
9890,122526.0,144.0,23.0,Male,Married/Living with Partner
11292,124347.0,158.0,26.0,Male,Married/Living with Partner


In [50]:
both.to_csv('data/nhanes.csv', index=False)